diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index be38f38..3e37fbd 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2827,6 +2827,15 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + "2. all : use VectorUDFAdaptor for all UDFs" ), + HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED("hive.vectorized.complex.types.enabled", true, + "This flag should be set to true to enable vectorization\n" + + "of expressions with complex types.\n" + + "The default value is true."), + HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED("hive.vectorized.groupby.complex.types.enabled", true, + "This flag should be set to true to enable group by vectorization\n" + + "of aggregations that use complex types.\n", + "For example, AVG uses a complex type (STRUCT) for partial aggregation results" + + "The default value is true."), HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " + "whether to check, convert, and normalize partition value to conform to its column type in " diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 07fd5bf..5eb9e30 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -623,6 +623,7 @@ minillaplocal.query.files=acid_globallimit.q,\ union_remove_26.q,\ union_top_level.q,\ vector_auto_smb_mapjoin_14.q,\ + vector_complex_all.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ vector_groupby_cube1.q,\ diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt index 46cbb5b..a463373 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvg.txt @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -38,6 +39,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import com.google.common.base.Preconditions; + /** * Generated from template VectorUDAFAvg.txt. */ @@ -46,7 +49,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn public class extends VectorAggregateExpression { private static final long serialVersionUID = 1L; - + /** class for storing the current aggregate value. */ static class Aggregation implements AggregationBuffer { @@ -59,10 +62,10 @@ public class extends VectorAggregateExpression { * Value is explicitly (re)initialized in reset() */ transient private boolean isNull = true; - - public void sumValue( value) { + + public void avgValue( value) { if (isNull) { - sum = value; + sum = value; count = 1; isNull = false; } else { @@ -75,7 +78,7 @@ public class extends VectorAggregateExpression { public int getVariableSize() { throw new UnsupportedOperationException(); } - + @Override public void reset () { isNull = true; @@ -83,44 +86,65 @@ public class extends VectorAggregateExpression { count = 0L; } } - - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } +#IF PARTIAL1 transient private Object[] partialResult; transient private LongWritable resultCount; transient private DoubleWritable resultSum; + transient private Writable resultInput; transient private StructObjectInspector soi; - - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private DoubleWritable fullResult; + transient private ObjectInspector oi; +#ENDIF COMPLETE + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE } - public () { - super(); - partialResult = new Object[2]; + private void init() { +#IF PARTIAL1 + partialResult = new Object[3]; resultCount = new LongWritable(); resultSum = new DoubleWritable(); + resultInput = new Writable(); partialResult[0] = resultCount; partialResult[1] = resultSum; + partialResult[2] = resultInput; initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF COMPLETE } +#IF PARTIAL1 private void initPartialResultInspector() { List foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableObjectInspector); List fname = new ArrayList(); fname.add("count"); fname.add("sum"); + fname.add("input"); soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); } - +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF COMPLETE + private Aggregation getCurrentAggregationBuffer( VectorAggregationBufferRow[] aggregationBufferSets, int bufferIndex, @@ -129,21 +153,21 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); return myagg; } - + @Override public void aggregateInputSelection( VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, + int bufferIndex, VectorizedRowBatch batch) throws HiveException { - + int batchSize = batch.size; - + if (batchSize == 0) { return; } - + inputExpression.evaluate(batch); - + inputVector = ( )batch. cols[this.inputExpression.getOutputColumn()]; [] vector = inputVector.vector; @@ -197,12 +221,12 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, bufferIndex, i); - myagg.sumValue(value); + myagg.avgValue(value); } - } + } private void iterateNoNullsSelectionWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, @@ -210,13 +234,13 @@ public class extends VectorAggregateExpression { [] values, int[] selection, int batchSize) { - + for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, bufferIndex, i); - myagg.sumValue(values[selection[i]]); + myagg.avgValue(values[selection[i]]); } } @@ -227,10 +251,10 @@ public class extends VectorAggregateExpression { int batchSize) { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, bufferIndex, i); - myagg.sumValue(values[i]); + myagg.avgValue(values[i]); } } @@ -245,15 +269,15 @@ public class extends VectorAggregateExpression { if (isNull[0]) { return; } - + for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, bufferIndex, i); - myagg.sumValue(value); + myagg.avgValue(value); } - + } private void iterateHasNullsRepeatingWithAggregationSelection( @@ -272,7 +296,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, bufferIndex, i); - myagg.sumValue(value); + myagg.avgValue(value); } } @@ -288,10 +312,10 @@ public class extends VectorAggregateExpression { int i = selection[j]; if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, bufferIndex, j); - myagg.sumValue(values[i]); + myagg.avgValue(values[i]); } } } @@ -306,68 +330,64 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, bufferIndex, i); - myagg.sumValue(values[i]); + myagg.avgValue(values[i]); } } } - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) throws HiveException { - - inputExpression.evaluate(batch); - - inputVector = - ()batch.cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - [] vector = inputVector.vector; - - if (inputVector.isRepeating) { - if (inputVector.noNulls) { - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; - myagg.count = 0; - } - myagg.sum += vector[0]*batchSize; - myagg.count += batchSize; + + inputExpression.evaluate(batch); + + inputVector = + ()batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + [] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; } - return; - } - - if (!batch.selectedInUse && inputVector.noNulls) { - iterateNoSelectionNoNulls(myagg, vector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); - } - else if (inputVector.noNulls){ - iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + myagg.sum += vector[0]*batchSize; + myagg.count += batchSize; } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } } - + private void iterateSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, - boolean[] isNull, + boolean[] isNull, int[] selected) { - + for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { @@ -384,17 +404,17 @@ public class extends VectorAggregateExpression { } private void iterateSelectionNoNulls( - Aggregation myagg, - [] vector, - int batchSize, + Aggregation myagg, + [] vector, + int batchSize, int[] selected) { - + if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; myagg.count = 0; } - + for (int i=0; i< batchSize; ++i) { value = vector[selected[i]]; myagg.sum += value; @@ -403,15 +423,15 @@ public class extends VectorAggregateExpression { } private void iterateNoSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, boolean[] isNull) { - + for(int i=0;i value = vector[i]; - if (myagg.isNull) { + if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; myagg.count = 0; @@ -423,15 +443,15 @@ public class extends VectorAggregateExpression { } private void iterateNoSelectionNoNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; myagg.count = 0; } - + for (int i=0;i value = vector[i]; myagg.sum += value; @@ -456,19 +476,29 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation) agg; if (myagg.isNull) { return null; - } - else { - assert(0 < myagg.count); + } else { + Preconditions.checkState(myagg.count > 0); +#IF PARTIAL1 resultCount.set (myagg.count); resultSum.set (myagg.sum); return partialResult; +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult.set (myagg.sum / myagg.count); + return fullResult; +#ENDIF COMPLETE } } - + @Override - public ObjectInspector getOutputObjectInspector() { + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL1 return soi; - } +#ENDIF PARTIAL1 +#IF COMPLETE + return oi; +#ENDIF COMPLETE + } @Override public long getAggregationBufferFixedSize() { @@ -481,15 +511,6 @@ public class extends VectorAggregateExpression { @Override public void init(AggregationDesc desc) throws HiveException { - // No-op + init(); } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } -} - +} \ No newline at end of file diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt new file mode 100644 index 0000000..fa7b7c7 --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimal.txt @@ -0,0 +1,566 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDecimal; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.base.Preconditions; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(AVG) - Returns the average value of expr (vectorized, type: decimal)") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private final HiveDecimalWritable sum = new HiveDecimalWritable(); + transient private long count; + transient private boolean isNull; + + public void avgValue(HiveDecimalWritable writable) { + if (isNull) { + // Make a copy since we intend to mutate sum. + sum.set(writable); + count = 1; + isNull = false; + } else { + // Note that if sum is out of range, mutateAdd will ignore the call. + // At the end, sum.isSet() can be checked for null. + sum.mutateAdd(writable); + count++; + } + } + + public void avgValueNoNullCheck(HiveDecimalWritable writable) { + sum.mutateAdd(writable); + count++; + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + isNull = true; + sum.setFromLong(0L); + count = 0; + } + } + +#IF PARTIAL1 + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private HiveDecimalWritable resultSum; + transient private HiveDecimalWritable resultInput; + transient private StructObjectInspector soi; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private HiveDecimalWritable tempDecWritable; + transient private HiveDecimalWritable fullResult; + transient private ObjectInspector oi; +#ENDIF COMPLETE + + /** + * The scale of the SUM in the partial output + */ + private int sumScale; + + /** + * The precision of the SUM in the partial output + */ + private int sumPrecision; + + /** + * the scale of the input expression + */ + private int inputScale; + + /** + * the precision of the input expression + */ + private int inputPrecision; + + public (VectorExpression inputExpression, + GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE + } + + private void init() { +#IF PARTIAL1 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new HiveDecimalWritable(); + resultInput = new HiveDecimalWritable(0L); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultInput; +#ENDIF PARTIAL1 +#IF COMPLETE + tempDecWritable = new HiveDecimalWritable(); + fullResult = new HiveDecimalWritable(); +#ENDIF COMPLETE + } + +#IF PARTIAL1 + private void initPartialResultInspector() { +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { +#ENDIF COMPLETE + // the output type of the vectorized partial aggregate must match the + // expected type for the row-mode aggregation + // For decimal, the type is "same number of integer digits and 4 more decimal digits" + + DecimalTypeInfo decTypeInfo = + GenericUDAFAverageEvaluatorDecimal.deriveResultDecimalTypeInfo( + inputPrecision, inputScale, mode); + this.sumScale = decTypeInfo.scale(); + this.sumPrecision = decTypeInfo.precision(); + +#IF PARTIAL1 + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo)); + foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo)); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("input"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); +#ENDIF PARTIAL1 +#IF COMPLETE + oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo); +#ENDIF COMPLETE + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + DecimalColumnVector inputVector = (DecimalColumnVector) batch. + cols[this.inputExpression.getOutputColumn()]; + HiveDecimalWritable[] vector = inputVector.vector; + + if (inputVector.noNulls) { + if (inputVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize); + } + } + } else { + if (inputVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector[0], batchSize, inputVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize, batch.selected, inputVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + vector, batchSize, inputVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable[] values, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(values[selection[i]]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable[] values, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(values[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable value, + int batchSize, + int[] selection, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(value); + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable value, + int batchSize, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(value); + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable[] values, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + j); + myagg.avgValue(values[i]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + HiveDecimalWritable[] values, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.avgValue(values[i]); + } + } + } + + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + DecimalColumnVector inputVector = + (DecimalColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + HiveDecimalWritable[] vector = inputVector.vector; + + if (inputVector.isRepeating) { + if (inputVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum.setFromLong(0L); + myagg.count = 0; + } + HiveDecimal value = vector[0].getHiveDecimal(); + HiveDecimal multiple = value.multiply(HiveDecimal.create(batchSize)); + myagg.sum.mutateAdd(multiple); + myagg.count += batchSize; + } + return; + } + + if (!batch.selectedInUse && inputVector.noNulls) { + iterateNoSelectionNoNulls(myagg, vector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); + } + else if (inputVector.noNulls){ + iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + HiveDecimalWritable[] vector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + myagg.avgValue(vector[i]); + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + HiveDecimalWritable[] vector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum.setFromLong(0L); + myagg.count = 0; + } + + for (int i=0; i< batchSize; ++i) { + myagg.avgValueNoNullCheck(vector[selected[i]]); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + HiveDecimalWritable[] vector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i 0); +#IF PARTIAL1 + resultCount.set (myagg.count); + resultSum.set(myagg.sum); + return partialResult; +#ENDIF PARTIAL1 +#IF COMPLETE + tempDecWritable.setFromLong (myagg.count); + fullResult.set(myagg.sum); + fullResult.mutateDivide(tempDecWritable); + fullResult.mutateEnforcePrecisionScale(sumPrecision, sumScale); + return fullResult; +#ENDIF COMPLETE + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL1 + return soi; +#ENDIF PARTIAL1 +#IF COMPLETE + return oi; +#ENDIF COMPLETE + } + + @Override + public long getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2() * 2, + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + init(); + + ExprNodeDesc inputExpr = desc.getParameters().get(0); + DecimalTypeInfo tiInput = (DecimalTypeInfo) inputExpr.getTypeInfo(); + this.inputScale = tiInput.scale(); + this.inputPrecision = tiInput.precision(); + +#IF PARTIAL1 + initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + initFullResultInspector(); +#ENDIF COMPLETE + } +} + diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt new file mode 100644 index 0000000..071efc9 --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgDecimalMerge.txt @@ -0,0 +1,597 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage.GenericUDAFAverageEvaluatorDecimal; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +import com.google.common.base.Preconditions; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(AVG) - Returns the average value of expr (vectorized, type: decimal)") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private final HiveDecimalWritable mergeSum = new HiveDecimalWritable(); + transient private long mergeCount; + transient private boolean isNull; + + public void merge(long count, HiveDecimalWritable sum) { + if (isNull) { + // Make a copy since we intend to mutate sum. + mergeCount = count; + mergeSum.set(sum); + isNull = false; + } else { + // Note that if sum is out of range, mutateAdd will ignore the call. + // At the end, sum.isSet() can be checked for null. + mergeCount += count; + mergeSum.mutateAdd(sum); + } + } + + public void mergeNoNullCheck(long count, HiveDecimalWritable sum) { + mergeCount += count; + mergeSum.mutateAdd(sum); + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + isNull = true; + mergeCount = 0; + mergeSum.setFromLong(0L); + } + } + +#IF PARTIAL2 + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private HiveDecimalWritable resultSum; + transient private HiveDecimalWritable resultInput; + transient private StructObjectInspector soi; +#ENDIF PARTIAL2 +#IF FINAL + transient private HiveDecimalWritable tempDecWritable; + transient private HiveDecimalWritable fullResult; + transient private ObjectInspector oi; +#ENDIF FINAL + + private transient int countOffset; + private transient int sumOffset; + private transient int inputOffset; + + /** + * The scale of the SUM in the partial output + */ + private int sumScale; + + /** + * The precision of the SUM in the partial output + */ + private int sumPrecision; + + /** + * the scale of the input expression + */ + private int inputScale; + + /** + * the precision of the input expression + */ + private int inputPrecision; + + public (VectorExpression inputExpression, + GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL2 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL2); +#ENDIF PARTIAL2 +#IF FINAL + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.FINAL); +#ENDIF FINAL + } + + private void init() { +#IF PARTIAL2 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new HiveDecimalWritable(); + resultInput = new HiveDecimalWritable(0L); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultInput; +#ENDIF PARTIAL2 +#IF FINAL + tempDecWritable = new HiveDecimalWritable(); + fullResult = new HiveDecimalWritable(); +#ENDIF FINAL + } + +#IF PARTIAL2 + private void initPartialResultInspector() { +#ENDIF PARTIAL2 +#IF FINAL + private void initFullResultInspector() { +#ENDIF FINAL + + // the output type of the vectorized partial aggregate must match the + // expected type for the row-mode aggregation + // For decimal, the type is "same number of integer digits and 4 more decimal digits" + + DecimalTypeInfo decTypeInfo = + GenericUDAFAverageEvaluatorDecimal.deriveResultDecimalTypeInfo( + inputPrecision, inputScale, mode); + this.sumScale = decTypeInfo.scale(); + this.sumPrecision = decTypeInfo.precision(); + +#IF PARTIAL2 + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo)); + foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo)); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("input"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); +#ENDIF PARTIAL2 +#IF FINAL + oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(decTypeInfo); +#ENDIF FINAL + } + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + HiveDecimalWritable[] sumVector = ((DecimalColumnVector) fields[sumOffset]).vector; + + if (inputStructColVector.noNulls) { + if (inputStructColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize); + } + } + } else { + if (inputStructColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize, inputStructColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize, inputStructColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + HiveDecimalWritable sum, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + HiveDecimalWritable[] sumVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + final int batchIndex = selection[i]; + myagg.merge(countVector[batchIndex], sumVector[batchIndex]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + HiveDecimalWritable sum, + int batchSize, + int[] selection, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + HiveDecimalWritable sum, + int batchSize, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i = 0; i < batchSize; i++) { + final int batchIndex = selection[i]; + if (!isNull[batchIndex]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[batchIndex], sumVector[batchIndex]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i]); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + HiveDecimalWritable[] sumVector = ((DecimalColumnVector) fields[sumOffset]).vector; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputStructColVector.isRepeating) { + if (inputStructColVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeSum.setFromLong(0L); + myagg.mergeCount = 0; + } + myagg.mergeCount += countVector[0] * batchSize; + HiveDecimal sum = sumVector[0].getHiveDecimal(); + HiveDecimal multiple = sum.multiply(HiveDecimal.create(batchSize)); + myagg.mergeSum.mutateAdd(multiple); + } + return; + } + + if (!batch.selectedInUse && inputStructColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, countVector, sumVector, batchSize); + } else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, countVector, sumVector, batchSize, inputStructColVector.isNull); + } else if (inputStructColVector.noNulls){ + iterateSelectionNoNulls(myagg, countVector, sumVector, batchSize, batch.selected); + } else { + iterateSelectionHasNulls(myagg, countVector, sumVector, batchSize, inputStructColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int i = 0; i < batchSize; i++) { + final int batchIndex = selected[i]; + if (!isNull[batchIndex]) { + myagg.merge(countVector[batchIndex], sumVector[batchIndex]); + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeSum.setFromLong(0L); + myagg.mergeCount = 0; + } + + for (int i = 0; i< batchSize; i++) { + final int batchIndex = selected[i]; + myagg.mergeNoNullCheck(countVector[batchIndex], sumVector[batchIndex]); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize, + boolean[] isNull) { + + for(int i = 0; i < batchSize; i++) { + if (!isNull[i]) { + myagg.merge(countVector[i], sumVector[i]); + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + long[] countVector, + HiveDecimalWritable[] sumVector, + int batchSize) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeSum.setFromLong(0L); + myagg.mergeCount = 0; + } + + for (int i = 0; i < batchSize; i++) { + myagg.mergeNoNullCheck(countVector[i], sumVector[i]); + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new Aggregation(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + Aggregation myAgg = (Aggregation) agg; + myAgg.reset(); + } + + @Override + public Object evaluateOutput( + AggregationBuffer agg) throws HiveException { + Aggregation myagg = (Aggregation) agg; + // !isSet checks for overflow. + if (myagg.isNull || !myagg.mergeSum.isSet()) { + return null; + } + else { + Preconditions.checkState(myagg.mergeCount > 0); +#IF PARTIAL2 + resultCount.set (myagg.mergeCount); + resultSum.set(myagg.mergeSum); + return partialResult; +#ENDIF PARTIAL2 +#IF FINAL + tempDecWritable.setFromLong (myagg.mergeCount); + fullResult.set(myagg.mergeSum); + fullResult.mutateDivide(tempDecWritable); + fullResult.mutateEnforcePrecisionScale(sumPrecision, sumScale); + return fullResult; +#ENDIF FINAL + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL2 + return soi; +#ENDIF PARTIAL2 +#IF FINAL + return oi; +#ENDIF FINAL + } + + @Override + public long getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2() * 2, + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + init(); + + ExprNodeDesc inputExpr = desc.getParameters().get(0); + + StructTypeInfo partialStructTypeInfo = (StructTypeInfo) inputExpr.getTypeInfo(); + + ArrayList fieldNames = partialStructTypeInfo.getAllStructFieldNames(); + countOffset = fieldNames.indexOf("count"); + sumOffset = fieldNames.indexOf("sum"); + inputOffset = fieldNames.indexOf("input"); + + DecimalTypeInfo tiInput = (DecimalTypeInfo) partialStructTypeInfo.getAllStructFieldTypeInfos().get(sumOffset); + this.inputScale = tiInput.scale(); + this.inputPrecision = tiInput.precision(); + +#IF PARTIAL2 + initPartialResultInspector(); +#ENDIF PARTIAL2 +#IF FINAL + initFullResultInspector(); +#ENDIF FINAL + } +} + diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt new file mode 100644 index 0000000..996d0dc --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgMerge.txt @@ -0,0 +1,547 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +import com.google.common.base.Preconditions; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: )") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private long mergeCount; + transient private double mergeSum; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public void merge(long count, double sum) { + if (isNull) { + mergeCount = count; + mergeSum = sum; + isNull = false; + } else { + mergeCount += count; + mergeSum += sum; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + mergeCount = 0L; + mergeSum = 0; + } + } + +#IF PARTIAL2 + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private DoubleWritable resultInput; + transient private StructObjectInspector soi; +#ENDIF PARTIAL2 +#IF FINAL + transient private DoubleWritable fullResult; + + transient private ObjectInspector oi; +#ENDIF FINAL + + private transient int countOffset; + private transient int sumOffset; + private transient int inputOffset; + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL2 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL2); +#ENDIF PARTIAL2 +#IF FINAL + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.FINAL); +#ENDIF FINAL + } + + private void init() { +#IF PARTIAL2 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultInput; + initPartialResultInspector(); +#ENDIF PARTIAL2 +#IF FINAL + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF FINAL + } + +#IF PARTIAL2 + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("input"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } +#ENDIF PARTIAL2 +#IF FINAL + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF FINAL + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + double[] sumVector = ((DoubleColumnVector) fields[sumOffset]).vector; + + if (inputStructColVector.noNulls) { + if (inputStructColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize); + } + } + } else { + if (inputStructColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], batchSize, inputStructColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, batchSize, inputStructColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + final int batchIndex = selection[i]; + myagg.merge(countVector[batchIndex], sumVector[batchIndex]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + int batchSize, + int[] selection, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + int batchSize, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i = 0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum); + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i = 0; i < batchSize; i++) { + final int batchIndex = selection[i]; + if (!isNull[batchIndex]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[batchIndex], sumVector[batchIndex]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i]); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + double[] sumVector = ((DoubleColumnVector) fields[sumOffset]).vector; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputStructColVector.isRepeating) { + if (inputStructColVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeCount = 0; + myagg.mergeSum = 0; + } + myagg.mergeCount += countVector[0] * batchSize; + myagg.mergeSum += sumVector[0] * batchSize; + } + return; + } + + if (!batch.selectedInUse && inputStructColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, countVector, sumVector, batchSize); + } else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, countVector, sumVector, batchSize, inputStructColVector.isNull); + } else if (inputStructColVector.noNulls){ + iterateSelectionNoNulls(myagg, countVector, sumVector, batchSize, batch.selected); + } else { + iterateSelectionHasNulls(myagg, countVector, sumVector, batchSize, inputStructColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int i=0; i < batchSize; i++) { + int batchIndex = selected[i]; + if (!isNull[batchIndex]) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeCount = 0; + myagg.mergeSum = 0; + } + myagg.mergeCount += countVector[batchIndex]; + myagg.mergeSum += sumVector[batchIndex]; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeCount = 0; + myagg.mergeSum = 0; + } + + for (int i = 0; i< batchSize; ++i) { + final int batchIndex = selected[i]; + myagg.mergeCount += countVector[batchIndex]; + myagg.mergeSum += sumVector[batchIndex]; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + int batchSize, + boolean[] isNull) { + + for(int i = 0; i < batchSize; i++) { + if (!isNull[i]) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeCount = 0; + myagg.mergeSum = 0; + } + myagg.mergeCount += countVector[i]; + myagg.mergeSum += sumVector[i]; + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + int batchSize) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.mergeCount = 0; + myagg.mergeSum = 0; + } + + for (int i=0;i fieldNames = partialStructTypeInfo.getAllStructFieldNames(); + countOffset = fieldNames.indexOf("count"); + sumOffset = fieldNames.indexOf("sum"); + inputOffset = fieldNames.indexOf("input"); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt new file mode 100644 index 0000000..b816a35 --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFAvgTimestamp.txt @@ -0,0 +1,517 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.ql.util.TimestampUtils; + +import com.google.common.base.Preconditions; + +/** + * Generated from template VectorUDAFAvg.txt. + */ +@Description(name = "avg", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: timestamp)") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public void sumValue(double value) { + if (isNull) { + sum = value; + count = 1; + isNull = false; + } else { + sum += value; + count++; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset() { + isNull = true; + sum = 0; + count = 0L; + } + } + +#IF PARTIAL1 + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private TimestampWritable resultInput; + transient private StructObjectInspector soi; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private DoubleWritable fullResult; + transient private ObjectInspector oi; +#ENDIF COMPLETE + + public (VectorExpression inputExpression, + GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE + } + + private void init() { +#IF PARTIAL1 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + resultInput = new TimestampWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultInput; + initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult = new DoubleWritable(); +#ENDIF COMPLETE + } + +#IF PARTIAL1 + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableTimestampObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("input"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF COMPLETE + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getDouble(0), + batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize); + } + } + } else { + if (inputColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getDouble(0), batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector.getDouble(0), batchSize, inputColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, batch.selected, inputColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + inputColVector, batchSize, inputColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue( + inputColVector.getDouble(selection[i])); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getDouble(i)); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[selection[i]]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + double value, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(value); + } + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int j=0; j < batchSize; ++j) { + int i = selection[j]; + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + j); + myagg.sumValue(inputColVector.getDouble(i)); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.sumValue(inputColVector.getDouble(i)); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = + (TimestampColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls) { + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += inputColVector.getDouble(0)*batchSize; + myagg.count += batchSize; + } + return; + } + + if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int j=0; j< batchSize; ++j) { + int i = selected[j]; + if (!isNull[i]) { + double value = inputColVector.getDouble(i); + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + myagg.sum += value; + myagg.count += 1; + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + int[] selected) { + + if (myagg.isNull) { + myagg.isNull = false; + myagg.sum = 0; + myagg.count = 0; + } + + for (int i=0; i< batchSize; ++i) { + double value = inputColVector.getDouble(selected[i]); + myagg.sum += value; + myagg.count += 1; + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + TimestampColumnVector inputColVector, + int batchSize, + boolean[] isNull) { + + for(int i=0;i. Vectorized implementation for MIN/MAX aggregates. +* . Vectorized implementation for MIN/MAX aggregates. */ -@Description(name = "", +@Description(name = "", value = "") public class extends VectorAggregateExpression { - + private static final long serialVersionUID = 1L; - - /** + + /** * class for storing the current aggregate value. */ static private final class Aggregation implements AggregationBuffer { @@ -55,7 +56,7 @@ public class extends VectorAggregateExpression { */ transient private boolean isNull = true; - public void checkValue( value) { + public void minmaxValue( value) { if (isNull) { isNull = false; this.value = value; @@ -64,6 +65,13 @@ public class extends VectorAggregateExpression { } } + // The isNull check and work has already been performed. + public void minmaxValueNoCheck( value) { + if (value this.value) { + this.value = value; + } + } + @Override public int getVariableSize() { throw new UnsupportedOperationException(); @@ -75,31 +83,24 @@ public class extends VectorAggregateExpression { value = 0; } } - - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } private transient VectorExpressionWriter resultWriter; - - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { } - + @Override public void init(AggregationDesc desc) throws HiveException { + init(); + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( desc.getParameters().get(0)); } - + private Aggregation getCurrentAggregationBuffer( VectorAggregationBufferRow[] aggregationBufferSets, int aggregrateIndex, @@ -108,21 +109,21 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregrateIndex); return myagg; } - + @Override public void aggregateInputSelection( VectorAggregationBufferRow[] aggregationBufferSets, - int aggregrateIndex, + int aggregrateIndex, VectorizedRowBatch batch) throws HiveException { - + int batchSize = batch.size; - + if (batchSize == 0) { return; } - + inputExpression.evaluate(batch); - + inputVector = ()batch. cols[this.inputExpression.getOutputColumn()]; [] vector = inputVector.vector; @@ -176,10 +177,10 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(value); + myagg.minmaxValue(value); } } @@ -189,13 +190,13 @@ public class extends VectorAggregateExpression { [] values, int[] selection, int batchSize) { - + for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(values[selection[i]]); + myagg.minmaxValue(values[selection[i]]); } } @@ -206,10 +207,10 @@ public class extends VectorAggregateExpression { int batchSize) { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(values[i]); + myagg.minmaxValue(values[i]); } } @@ -224,15 +225,15 @@ public class extends VectorAggregateExpression { if (isNull[0]) { return; } - + for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(value); + myagg.minmaxValue(value); } - + } private void iterateHasNullsRepeatingWithAggregationSelection( @@ -251,7 +252,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(value); + myagg.minmaxValue(value); } } @@ -267,10 +268,10 @@ public class extends VectorAggregateExpression { int i = selection[j]; if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregrateIndex, j); - myagg.checkValue(values[i]); + myagg.minmaxValue(values[i]); } } } @@ -285,42 +286,40 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregrateIndex, i); - myagg.checkValue(values[i]); + myagg.minmaxValue(values[i]); } } - } - + } + @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + inputExpression.evaluate(batch); - + inputVector = ()batch. cols[this.inputExpression.getOutputColumn()]; - + int batchSize = batch.size; - + if (batchSize == 0) { return; } - + Aggregation myagg = (Aggregation)agg; - + [] vector = inputVector.vector; - + if (inputVector.isRepeating) { - if (inputVector.noNulls && - (myagg.isNull || (vector[0] myagg.value))) { - myagg.isNull = false; - myagg.value = vector[0]; + if (inputVector.noNulls) { + myagg.minmaxValue(vector[0]); } return; } - + if (!batch.selectedInUse && inputVector.noNulls) { iterateNoSelectionNoNulls(myagg, vector, batchSize); } @@ -334,82 +333,66 @@ public class extends VectorAggregateExpression { iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); } } - + private void iterateSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, - boolean[] isNull, + boolean[] isNull, int[] selected) { - + for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { value = vector[i]; - if (myagg.isNull) { - myagg.isNull = false; - myagg.value = value; - } - else if (value myagg.value) { - myagg.value = value; - } + myagg.minmaxValue(value); } } } private void iterateSelectionNoNulls( - Aggregation myagg, - [] vector, - int batchSize, + Aggregation myagg, + [] vector, + int batchSize, int[] selected) { - + if (myagg.isNull) { myagg.value = vector[selected[0]]; myagg.isNull = false; } - + for (int i=0; i< batchSize; ++i) { value = vector[selected[i]]; - if (value myagg.value) { - myagg.value = value; - } + myagg.minmaxValueNoCheck(value); } } private void iterateNoSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, boolean[] isNull) { - + for(int i=0;i value = vector[i]; - if (myagg.isNull) { - myagg.value = value; - myagg.isNull = false; - } - else if (value myagg.value) { - myagg.value = value; - } + myagg.minmaxValue(value); } } } private void iterateNoSelectionNoNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize) { if (myagg.isNull) { myagg.value = vector[0]; myagg.isNull = false; } - + for (int i=0;i value = vector[i]; - if (value myagg.value) { - myagg.value = value; - } + myagg.minmaxValueNoCheck(value); } } @@ -435,7 +418,7 @@ public class extends VectorAggregateExpression { return resultWriter.writeValue(myagg.value); } } - + @Override public ObjectInspector getOutputObjectInspector() { return resultWriter.getObjectInspector(); @@ -449,13 +432,4 @@ public class extends VectorAggregateExpression { model.primitive2(), model.memoryAlign()); } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } } - diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt index 58d2d22..6c024f7 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxDecimal.txt @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -81,26 +82,19 @@ public class extends VectorAggregateExpression { } } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - private transient VectorExpressionWriter resultWriter; - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { } @Override public void init(AggregationDesc desc) throws HiveException { + init(); + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( desc.getParameters().get(0)); } @@ -335,14 +329,14 @@ public class extends VectorAggregateExpression { iterateNoSelectionNoNulls(myagg, vector, inputVector.scale, batchSize); } else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, vector, inputVector.scale, + iterateNoSelectionHasNulls(myagg, vector, inputVector.scale, batchSize, inputVector.isNull); } else if (inputVector.noNulls){ iterateSelectionNoNulls(myagg, vector, inputVector.scale, batchSize, batch.selected); } else { - iterateSelectionHasNulls(myagg, vector, inputVector.scale, + iterateSelectionHasNulls(myagg, vector, inputVector.scale, batchSize, inputVector.isNull, batch.selected); } } @@ -465,12 +459,4 @@ public class extends VectorAggregateExpression { model.primitive2(), model.memoryAlign()); } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt index 515692e..d12f231 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxIntervalDayTime.txt @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -80,26 +81,19 @@ public class extends VectorAggregateExpression { } } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - private transient VectorExpressionWriter resultWriter; - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { } @Override public void init(AggregationDesc desc) throws HiveException { + init(); + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( desc.getParameters().get(0)); } @@ -448,13 +442,5 @@ public class extends VectorAggregateExpression { model.primitive2(), model.memoryAlign()); } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt index c210e4c..d5eb712 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxString.txt @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -92,22 +93,13 @@ public class extends VectorAggregateExpression { } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - transient private Text result; - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { result = new Text(); } @@ -120,7 +112,7 @@ public class extends VectorAggregateExpression { return myagg; } -@Override + @Override public void aggregateInputSelection( VectorAggregationBufferRow[] aggregationBufferSets, int aggregrateIndex, @@ -404,15 +396,6 @@ public class extends VectorAggregateExpression { @Override public void init(AggregationDesc desc) throws HiveException { - // No-op - } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; + init(); } } - diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt index 074aefd..f78de56 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFMinMaxTimestamp.txt @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -82,26 +83,19 @@ public class extends VectorAggregateExpression { } } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - private transient VectorExpressionWriter resultWriter; - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { } @Override public void init(AggregationDesc desc) throws HiveException { + init(); + resultWriter = VectorExpressionWriterFactory.genVectorExpressionWritable( desc.getParameters().get(0)); } @@ -450,13 +444,5 @@ public class extends VectorAggregateExpression { model.primitive2(), model.memoryAlign()); } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; - } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt index a89ae0a..475d578 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFSum.txt @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -34,15 +35,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; /** -* . Vectorized implementation for SUM aggregates. +* . Vectorized implementation for SUM aggregates. */ @Description(name = "sum", value = "_FUNC_(expr) - Returns the sum value of expr (vectorized, type: )") public class extends VectorAggregateExpression { - + private static final long serialVersionUID = 1L; - - /** + + /** * class for storing the current aggregate value. */ private static final class Aggregation implements AggregationBuffer { @@ -55,7 +56,7 @@ public class extends VectorAggregateExpression { * Value is explicitly (re)initialized in reset() */ transient private boolean isNull = true; - + public void sumValue( value) { if (isNull) { sum = value; @@ -65,6 +66,11 @@ public class extends VectorAggregateExpression { } } + // The isNull check and work has already been performed. + public void sumValueNoCheck( value) { + sum += value; + } + @Override public int getVariableSize() { throw new UnsupportedOperationException(); @@ -76,23 +82,14 @@ public class extends VectorAggregateExpression { sum = 0;; } } - - private VectorExpression inputExpression; - @Override - public VectorExpression inputExpression() { - return inputExpression; - } + transient private result; - transient private final result; - - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); } - public () { - super(); + private void init() { result = new (); } @@ -104,21 +101,21 @@ public class extends VectorAggregateExpression { Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); return myagg; } - + @Override public void aggregateInputSelection( VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, + int aggregateIndex, VectorizedRowBatch batch) throws HiveException { - + int batchSize = batch.size; - + if (batchSize == 0) { return; } - + inputExpression.evaluate(batch); - + inputVector = ()batch. cols[this.inputExpression.getOutputColumn()]; [] vector = inputVector.vector; @@ -172,12 +169,12 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregateIndex, i); myagg.sumValue(value); } - } + } private void iterateNoNullsSelectionWithAggregationSelection( VectorAggregationBufferRow[] aggregationBufferSets, @@ -185,10 +182,10 @@ public class extends VectorAggregateExpression { [] values, int[] selection, int batchSize) { - + for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregateIndex, i); myagg.sumValue(values[selection[i]]); @@ -202,7 +199,7 @@ public class extends VectorAggregateExpression { int batchSize) { for (int i=0; i < batchSize; ++i) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregateIndex, i); myagg.sumValue(values[i]); @@ -228,7 +225,7 @@ public class extends VectorAggregateExpression { i); myagg.sumValue(value); } - + } private void iterateHasNullsRepeatingWithAggregationSelection( @@ -263,7 +260,7 @@ public class extends VectorAggregateExpression { int i = selection[j]; if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregateIndex, j); myagg.sumValue(values[i]); @@ -281,45 +278,44 @@ public class extends VectorAggregateExpression { for (int i=0; i < batchSize; ++i) { if (!isNull[i]) { Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, + aggregationBufferSets, aggregateIndex, i); myagg.sumValue(values[i]); } } } - - + @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + inputExpression.evaluate(batch); - + inputVector = ()batch. cols[this.inputExpression.getOutputColumn()]; - + int batchSize = batch.size; - + if (batchSize == 0) { return; } - + Aggregation myagg = (Aggregation)agg; [] vector = inputVector.vector; - + if (inputVector.isRepeating) { if (inputVector.noNulls) { if (myagg.isNull) { myagg.isNull = false; myagg.sum = 0; } - myagg.sum += vector[0]*batchSize; + myagg.sumValueNoCheck(vector[0]*batchSize); } return; } - + if (!batch.selectedInUse && inputVector.noNulls) { iterateNoSelectionNoNulls(myagg, vector, batchSize); } @@ -333,14 +329,14 @@ public class extends VectorAggregateExpression { iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); } } - + private void iterateSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, - boolean[] isNull, + boolean[] isNull, int[] selected) { - + for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { @@ -349,58 +345,58 @@ public class extends VectorAggregateExpression { myagg.isNull = false; myagg.sum = 0; } - myagg.sum += value; + myagg.sumValueNoCheck(value); } } } private void iterateSelectionNoNulls( - Aggregation myagg, - [] vector, - int batchSize, + Aggregation myagg, + [] vector, + int batchSize, int[] selected) { - + if (myagg.isNull) { myagg.sum = 0; myagg.isNull = false; } - + for (int i=0; i< batchSize; ++i) { value = vector[selected[i]]; - myagg.sum += value; + myagg.sumValueNoCheck(value); } } private void iterateNoSelectionHasNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize, boolean[] isNull) { - + for(int i=0;i value = vector[i]; - if (myagg.isNull) { + if (myagg.isNull) { myagg.sum = 0; myagg.isNull = false; } - myagg.sum += value; + myagg.sumValueNoCheck(value); } } } private void iterateNoSelectionNoNulls( - Aggregation myagg, - [] vector, + Aggregation myagg, + [] vector, int batchSize) { if (myagg.isNull) { myagg.sum = 0; myagg.isNull = false; } - + for (int i=0;i value = vector[i]; - myagg.sum += value; + myagg.sumValueNoCheck(value); } } @@ -426,7 +422,7 @@ public class extends VectorAggregateExpression { return result; } } - + @Override public ObjectInspector getOutputObjectInspector() { return ; @@ -442,15 +438,6 @@ public class extends VectorAggregateExpression { @Override public void init(AggregationDesc desc) throws HiveException { - // No-op - } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; + init(); } } - diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt index 1e3516b..390bd02 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVar.txt @@ -30,13 +30,17 @@ import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import com.google.common.base.Preconditions; + /** * . Vectorized implementation for VARIANCE aggregates. */ @@ -69,6 +73,22 @@ public class extends VectorAggregateExpression { variance = 0; } + public void varianceValue(double value) { + if (isNull) { + sum = value; + count = 1; + variance = 0; + isNull = false; + } else { + sum += value; + count++; + if (count > 1) { + double t = count * value - sum; + variance += (t * t) / ((double) count * (count - 1)); + } + } + } + @Override public int getVariableSize() { throw new UnsupportedOperationException(); @@ -83,28 +103,32 @@ public class extends VectorAggregateExpression { } } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - +#IF PARTIAL1 transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; transient private Object[] partialResult; transient private ObjectInspector soi; - - - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private DoubleWritable fullResult; + + transient private ObjectInspector oi; +#ENDIF COMPLETE + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE } - public () { - super(); + private void init() { +#IF PARTIAL1 partialResult = new Object[3]; resultCount = new LongWritable(); resultSum = new DoubleWritable(); @@ -113,8 +137,14 @@ public class extends VectorAggregateExpression { partialResult[1] = resultSum; partialResult[2] = resultVariance; initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF COMPLETE } +#IF PARTIAL1 private void initPartialResultInspector() { List foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); @@ -128,6 +158,12 @@ public class extends VectorAggregateExpression { soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); } +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF COMPLETE private Aggregation getCurrentAggregationBuffer( VectorAggregationBufferRow[] aggregationBufferSets, @@ -196,14 +232,9 @@ public class extends VectorAggregateExpression { aggregateIndex, i); if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); + myagg.init(); } + myagg.varianceValue(value); } } @@ -226,12 +257,7 @@ public class extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); } } } @@ -252,12 +278,7 @@ public class extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); } } @@ -278,12 +299,7 @@ public class extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); } } } @@ -302,19 +318,13 @@ public class extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - double value = vector[i]; - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(vector[i]); } } @Override public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { + throws HiveException { inputExpression.evaluate(batch); @@ -359,15 +369,7 @@ public class extends VectorAggregateExpression { myagg.init (); } - // TODO: conjure a formula w/o iterating - // - - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); // We pulled out i=0 so we can remove the count > 1 check in the loop for (int i=1; i extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); } } } @@ -413,12 +410,7 @@ public class extends VectorAggregateExpression { } double value = vector[selected[0]]; - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); // i=0 was pulled out to remove the count > 1 check in the loop // @@ -443,12 +435,7 @@ public class extends VectorAggregateExpression { if (myagg.isNull) { myagg.init (); } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); } } } @@ -463,13 +450,7 @@ public class extends VectorAggregateExpression { } double value = vector[0]; - myagg.sum += value; - myagg.count += 1; - - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } + myagg.varianceValue(value); // i=0 was pulled out to remove count > 1 check for (int i=1; i extends VectorAggregateExpression { Aggregation myagg = (Aggregation) agg; if (myagg.isNull) { return null; - } - else { - assert(0 < myagg.count); + } else { +#IF PARTIAL1 resultCount.set (myagg.count); resultSum.set (myagg.sum); resultVariance.set (myagg.variance); return partialResult; +#ENDIF PARTIAL1 +#IF COMPLETE + if (myagg.count == 0) { + return null; // SQL standard - return null for zero elements + } else if (myagg.count > 1) { +#IF VARIANCE + fullResult.set(myagg.variance / (myagg.count)); +#ENDIF VARIANCE +#IF VARIANCE_SAMPLE + fullResult.set(myagg.variance / (myagg.count - 1)); +#ENDIF VARIANCE_SAMPLE +#IF STD + fullResult.set(Math.sqrt(myagg.variance / (myagg.count))); +#ENDIF STD +#IF STD_SAMPLE + fullResult.set(Math.sqrt(myagg.variance / (myagg.count - 1))); +#ENDIF STD_SAMPLE + } else { + + // For one element the variance is always 0. + fullResult.set(0); + } + return fullResult; +#ENDIF COMPLETE } } + @Override - public ObjectInspector getOutputObjectInspector() { - return soi; - } + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL1 + return soi; +#ENDIF PARTIAL1 +#IF COMPLETE + return oi; +#ENDIF COMPLETE + } @Override public long getAggregationBufferFixedSize() { @@ -524,15 +534,7 @@ public class extends VectorAggregateExpression { @Override public void init(AggregationDesc desc) throws HiveException { - // No-op - } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; + init(); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt index b3ec7e9..ba246e2 100644 --- ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarDecimal.txt @@ -31,13 +31,17 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import com.google.common.base.Preconditions; + /** * . Vectorized implementation for VARIANCE aggregates. */ @@ -68,9 +72,29 @@ public class extends VectorAggregateExpression { public void init() { isNull = false; - sum = 0f; + sum = 0.0; count = 0; - variance = 0f; + variance = 0.0; + } + + public void varianceValue(double value) { + if (isNull) { + sum = value; + count = 1; + variance = 0; + isNull = false; + } else { + sum += value; + count++; + if (count > 1) { + double t = count * value - sum; + variance += (t * t) / ((double) count * (count - 1)); + } + } + } + + public void varianceValue(HiveDecimalWritable value) { + varianceValue(value.doubleValue()); } @Override @@ -81,56 +105,39 @@ public class extends VectorAggregateExpression { @Override public void reset () { isNull = true; - sum = 0f; + sum = 0.0; count = 0; - variance = 0f; - } - - public void updateValueWithCheckAndInit(HiveDecimalWritable value, short scale) { - if (this.isNull) { - this.init(); - } - - double dval = value.getHiveDecimal().doubleValue(); - this.sum += dval; - this.count += 1; - if(this.count > 1) { - double t = this.count*dval - this.sum; - this.variance += (t*t) / ((double)this.count*(this.count-1)); - } - } - - public void updateValueNoCheck(HiveDecimalWritable value, short scale) { - double dval = value.getHiveDecimal().doubleValue(); - this.sum += dval; - this.count += 1; - double t = this.count*dval - this.sum; - this.variance += (t*t) / ((double)this.count*(this.count-1)); + variance = 0.0; } } - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - +#IF PARTIAL1 transient private LongWritable resultCount; transient private DoubleWritable resultSum; transient private DoubleWritable resultVariance; transient private Object[] partialResult; transient private ObjectInspector soi; - - public (VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private DoubleWritable fullResult; + + transient private ObjectInspector oi; +#ENDIF COMPLETE + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE } - public () { - super(); + private void init() { +#IF PARTIAL1 partialResult = new Object[3]; resultCount = new LongWritable(); resultSum = new DoubleWritable(); @@ -139,8 +146,14 @@ public class extends VectorAggregateExpression { partialResult[1] = resultSum; partialResult[2] = resultVariance; initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF COMPLETE } +#IF PARTIAL1 private void initPartialResultInspector() { List foi = new ArrayList(); foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); @@ -154,6 +167,12 @@ public class extends VectorAggregateExpression { soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); } +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF COMPLETE private Aggregation getCurrentAggregationBuffer( VectorAggregationBufferRow[] aggregationBufferSets, @@ -196,12 +215,12 @@ public class extends VectorAggregateExpression { } else if (!batch.selectedInUse) { iterateNoSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, vector, inputVector.scale, + aggregationBufferSets, aggregateIndex, vector, inputVector.scale, batchSize, inputVector.isNull); } else if (inputVector.noNulls){ iterateSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, vector, inputVector.scale, + aggregationBufferSets, aggregateIndex, vector, inputVector.scale, batchSize, batch.selected); } else { @@ -224,7 +243,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, aggregateIndex, i); - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(value); } } @@ -244,8 +263,7 @@ public class extends VectorAggregateExpression { j); int i = selected[j]; if (!isNull[i]) { - HiveDecimalWritable value = vector[i]; - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(vector[i]); } } } @@ -263,8 +281,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, aggregateIndex, i); - HiveDecimalWritable value = vector[selected[i]]; - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(vector[selected[i]]); } } @@ -282,8 +299,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, aggregateIndex, i); - HiveDecimalWritable value = vector[i]; - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(vector[i]); } } } @@ -300,8 +316,7 @@ public class extends VectorAggregateExpression { aggregationBufferSets, aggregateIndex, i); - HiveDecimalWritable value = vector[i]; - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(vector[i]); } } @@ -339,7 +354,7 @@ public class extends VectorAggregateExpression { iterateSelectionNoNulls(myagg, vector, inputVector.scale, batchSize, batch.selected); } else { - iterateSelectionHasNulls(myagg, vector, inputVector.scale, + iterateSelectionHasNulls(myagg, vector, inputVector.scale, batchSize, inputVector.isNull, batch.selected); } } @@ -350,14 +365,9 @@ public class extends VectorAggregateExpression { short scale, int batchSize) { - // TODO: conjure a formula w/o iterating - // - - myagg.updateValueWithCheckAndInit(value, scale); - - // We pulled out i=0 so we can remove the count > 1 check in the loop - for (int i=1; i extends VectorAggregateExpression { for (int j=0; j< batchSize; ++j) { int i = selected[j]; if (!isNull[i]) { - HiveDecimalWritable value = vector[i]; - myagg.updateValueWithCheckAndInit(value, scale); + myagg.varianceValue(vector[i]); } } } @@ -385,18 +394,8 @@ public class extends VectorAggregateExpression { int batchSize, int[] selected) { - if (myagg.isNull) { - myagg.init (); - } - - HiveDecimalWritable value = vector[selected[0]]; - myagg.updateValueWithCheckAndInit(value, scale); - - // i=0 was pulled out to remove the count > 1 check in the loop - // - for (int i=1; i< batchSize; ++i) { - value = vector[selected[i]]; - myagg.updateValueNoCheck(value, scale); + for (int i=0; i< batchSize; ++i) { + myagg.varianceValue(vector[selected[i]]); } } @@ -409,8 +408,7 @@ public class extends VectorAggregateExpression { for(int i=0;i extends VectorAggregateExpression { short scale, int batchSize) { - if (myagg.isNull) { - myagg.init (); - } - - HiveDecimalWritable value = vector[0]; - myagg.updateValueWithCheckAndInit(value, scale); - - // i=0 was pulled out to remove count > 1 check - for (int i=1; i extends VectorAggregateExpression { Aggregation myagg = (Aggregation) agg; if (myagg.isNull) { return null; - } - else { - assert(0 < myagg.count); - resultCount.set(myagg.count); - resultSum.set(myagg.sum); - resultVariance.set(myagg.variance); + } else { +#IF PARTIAL1 + resultCount.set (myagg.count); + resultSum.set (myagg.sum); + resultVariance.set (myagg.variance); return partialResult; +#ENDIF PARTIAL1 +#IF COMPLETE + if (myagg.count == 0) { + return null; // SQL standard - return null for zero elements + } else if (myagg.count > 1) { +#IF VARIANCE + fullResult.set(myagg.variance / (myagg.count)); +#ENDIF VARIANCE +#IF VARIANCE_SAMPLE + fullResult.set(myagg.variance / (myagg.count - 1)); +#ENDIF VARIANCE_SAMPLE +#IF STD + fullResult.set(Math.sqrt(myagg.variance / (myagg.count))); +#ENDIF STD +#IF STD_SAMPLE + fullResult.set(Math.sqrt(myagg.variance / (myagg.count - 1))); +#ENDIF STD_SAMPLE + } else { + + // For one element the variance is always 0. + fullResult.set(0); + } + + return fullResult; +#ENDIF COMPLETE } } + @Override - public ObjectInspector getOutputObjectInspector() { - return soi; - } + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL1 + return soi; +#ENDIF PARTIAL1 +#IF COMPLETE + return oi; +#ENDIF COMPLETE + } @Override public long getAggregationBufferFixedSize() { @@ -478,14 +497,6 @@ public class extends VectorAggregateExpression { @Override public void init(AggregationDesc desc) throws HiveException { - // No-op - } - - public VectorExpression getInputExpression() { - return inputExpression; - } - - public void setInputExpression(VectorExpression inputExpression) { - this.inputExpression = inputExpression; + init(); } } diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt new file mode 100644 index 0000000..447685b --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarMerge.txt @@ -0,0 +1,573 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.StructColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +import com.google.common.base.Preconditions; + +/** + * Generated from template VectorUDAFVarMerge.txt. + */ +@Description(name = "variance", + value = "_FUNC_(expr) - Returns the average value of expr (vectorized, type: )") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** class for storing the current aggregate value. */ + static class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private long mergeCount; + transient private double mergeSum; + transient private double mergeVariance; + + /** + * Value is explicitly (re)initialized in reset() + */ + transient private boolean isNull = true; + + public void merge(long partialCount, double partialSum, double partialVariance) { + final long origMergeCount; + if (isNull || mergeCount == 0) { + // Just copy the information since there is nothing so far. + origMergeCount = 0; + mergeCount = partialCount; + mergeSum = partialSum; + mergeVariance = partialVariance; + isNull = false; + } else { + origMergeCount = mergeCount; + } + + if (partialCount > 0 && origMergeCount > 0) { + + // Merge the two partials + + mergeCount += partialCount; + final double origMergeSum = mergeSum; + mergeSum += partialSum; + + final double doublePartialCount = (double) partialCount; + final double doubleOrigMergeCount = (double) origMergeCount; + double t = (doublePartialCount / doubleOrigMergeCount) * origMergeSum - partialSum; + mergeVariance += + partialVariance + ((doubleOrigMergeCount / doublePartialCount) / + (doubleOrigMergeCount + doublePartialCount)) * t * t; + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + mergeCount = 0L; + mergeSum = 0; + mergeVariance = 0; + } + } + +#IF PARTIAL2 + transient private Object[] partialResult; + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private DoubleWritable resultVariance; + transient private StructObjectInspector soi; +#ENDIF PARTIAL2 +#IF FINAL + transient private DoubleWritable fullResult; + transient private ObjectInspector oi; +#ENDIF FINAL + + private transient int countOffset; + private transient int sumOffset; + private transient int varianceOffset; + + public (VectorExpression inputExpression, GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL2 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL2); +#ENDIF PARTIAL2 +#IF FINAL + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.FINAL); +#ENDIF FINAL + } + + private void init() { +#IF PARTIAL2 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + resultVariance = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultVariance; + initPartialResultInspector(); +#ENDIF PARTIAL2 +#IF FINAL + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF FINAL + } + +#IF PARTIAL2 + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } +#ENDIF PARTIAL2 +#IF FINAL + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF FINAL + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); + return myagg; + } + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + VectorizedRowBatch batch) throws HiveException { + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + double[] sumVector = ((DoubleColumnVector) fields[sumOffset]).vector; + double[] varianceVector = ((DoubleColumnVector) fields[varianceOffset]).vector; + + if (inputStructColVector.noNulls) { + if (inputStructColVector.isRepeating) { + iterateNoNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], varianceVector[0], batchSize); + } else { + if (batch.selectedInUse) { + iterateNoNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, varianceVector, batch.selected, batchSize); + } else { + iterateNoNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, varianceVector, batchSize); + } + } + } else { + if (inputStructColVector.isRepeating) { + if (batch.selectedInUse) { + iterateHasNullsRepeatingSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], varianceVector[0], batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsRepeatingWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector[0], sumVector[0], varianceVector[0], batchSize, inputStructColVector.isNull); + } + } else { + if (batch.selectedInUse) { + iterateHasNullsSelectionWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, varianceVector, batchSize, batch.selected, inputStructColVector.isNull); + } else { + iterateHasNullsWithAggregationSelection( + aggregationBufferSets, bufferIndex, + countVector, sumVector, varianceVector, batchSize, inputStructColVector.isNull); + } + } + } + } + + private void iterateNoNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + double variance, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum, variance); + } + } + + private void iterateNoNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int[] selection, + int batchSize) { + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + final int batchIndex = selection[i]; + myagg.merge(countVector[batchIndex], sumVector[batchIndex], varianceVector[batchIndex]); + } + } + + private void iterateNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize) { + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i], varianceVector[i]); + } + } + + private void iterateHasNullsRepeatingSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + double variance, + int batchSize, + int[] selection, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i=0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum, variance); + } + + } + + private void iterateHasNullsRepeatingWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long count, + double sum, + double variance, + int batchSize, + boolean[] isNull) { + + if (isNull[0]) { + return; + } + + for (int i = 0; i < batchSize; ++i) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(count, sum, variance); + } + } + + private void iterateHasNullsSelectionWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize, + int[] selection, + boolean[] isNull) { + + for (int i = 0; i < batchSize; i++) { + final int batchIndex = selection[i]; + if (!isNull[batchIndex]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[batchIndex], sumVector[batchIndex], varianceVector[batchIndex]); + } + } + } + + private void iterateHasNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int bufferIndex, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize, + boolean[] isNull) { + + for (int i=0; i < batchSize; ++i) { + if (!isNull[i]) { + Aggregation myagg = getCurrentAggregationBuffer( + aggregationBufferSets, + bufferIndex, + i); + myagg.merge(countVector[i], sumVector[i], varianceVector[i]); + } + } + } + + @Override + public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) + throws HiveException { + + inputExpression.evaluate(batch); + + StructColumnVector inputStructColVector = + (StructColumnVector) batch.cols[this.inputExpression.getOutputColumn()]; + ColumnVector[] fields = inputStructColVector.fields; + + long[] countVector = ((LongColumnVector) fields[countOffset]).vector; + double[] sumVector = ((DoubleColumnVector) fields[sumOffset]).vector; + double[] varianceVector = ((DoubleColumnVector) fields[varianceOffset]).vector; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + Aggregation myagg = (Aggregation)agg; + + if (inputStructColVector.isRepeating) { + if (inputStructColVector.noNulls) { + final long count = countVector[0]; + final double sum = sumVector[0]; + final double variance = varianceVector[0]; + for (int i = 0; i < batchSize; i++) { + myagg.merge(count, sum, variance); + } + } + return; + } + + if (!batch.selectedInUse && inputStructColVector.noNulls) { + iterateNoSelectionNoNulls(myagg, countVector, sumVector, varianceVector, batchSize); + } else if (!batch.selectedInUse) { + iterateNoSelectionHasNulls(myagg, countVector, sumVector, varianceVector, batchSize, inputStructColVector.isNull); + } else if (inputStructColVector.noNulls){ + iterateSelectionNoNulls(myagg, countVector, sumVector, varianceVector, batchSize, batch.selected); + } else { + iterateSelectionHasNulls(myagg, countVector, sumVector, varianceVector, batchSize, inputStructColVector.isNull, batch.selected); + } + } + + private void iterateSelectionHasNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize, + boolean[] isNull, + int[] selected) { + + for (int i=0; i < batchSize; i++) { + int batchIndex = selected[i]; + if (!isNull[batchIndex]) { + myagg.merge(countVector[batchIndex], sumVector[batchIndex], varianceVector[batchIndex]); + } + } + } + + private void iterateSelectionNoNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize, + int[] selected) { + + for (int i = 0; i< batchSize; ++i) { + final int batchIndex = selected[i]; + myagg.merge(countVector[batchIndex], sumVector[batchIndex], varianceVector[batchIndex]); + } + } + + private void iterateNoSelectionHasNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize, + boolean[] isNull) { + + for(int i = 0; i < batchSize; i++) { + if (!isNull[i]) { + myagg.merge(countVector[i], sumVector[i], varianceVector[i]); + } + } + } + + private void iterateNoSelectionNoNulls( + Aggregation myagg, + long[] countVector, + double[] sumVector, + double[] varianceVector, + int batchSize) { + for (int i=0;i 1) { +#IF VARIANCE + fullResult.set(myagg.mergeVariance / (myagg.mergeCount)); +#ENDIF VARIANCE +#IF VARIANCE_SAMPLE + fullResult.set(myagg.mergeVariance / (myagg.mergeCount - 1)); +#ENDIF VARIANCE_SAMPLE +#IF STD + fullResult.set(Math.sqrt(myagg.mergeVariance / (myagg.mergeCount))); +#ENDIF STD +#IF STD_SAMPLE + fullResult.set(Math.sqrt(myagg.mergeVariance / (myagg.mergeCount - 1))); +#ENDIF STD_SAMPLE + } else { + + // For one element the variance is always 0. + fullResult.set(0); + } + + return fullResult; +#ENDIF FINAL + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL2 + return soi; +#ENDIF PARTIAL2 +#IF FINAL + return oi; +#ENDIF FINAL + } + + @Override + public long getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2() * 2, + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + init(); + + ExprNodeDesc inputExpr = desc.getParameters().get(0); + StructTypeInfo partialStructTypeInfo = (StructTypeInfo) inputExpr.getTypeInfo(); + + ArrayList fieldNames = partialStructTypeInfo.getAllStructFieldNames(); + countOffset = fieldNames.indexOf("count"); + sumOffset = fieldNames.indexOf("sum"); + varianceOffset = fieldNames.indexOf("variance"); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt new file mode 100644 index 0000000..8ef1a9f --- /dev/null +++ ql/src/gen/vectorization/UDAFTemplates/VectorUDAFVarTimestamp.txt @@ -0,0 +1,477 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; +import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +import com.google.common.base.Preconditions; + +/** +* VectorUDAFVarPopTimestamp. Vectorized implementation for VARIANCE aggregates. +*/ +@Description(name = "variance, var_pop", + value = "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, timestamp)") +public class extends VectorAggregateExpression { + + private static final long serialVersionUID = 1L; + + /** + /* class for storing the current aggregate value. + */ + private static final class Aggregation implements AggregationBuffer { + + private static final long serialVersionUID = 1L; + + transient private double sum; + transient private long count; + transient private double variance; + + /** + * Value is explicitly (re)initialized in reset() (despite the init() bellow...) + */ + transient private boolean isNull = true; + + public void init() { + isNull = false; + sum = 0; + count = 0; + variance = 0; + } + + public void varianceValue(double value) { + if (isNull) { + sum = value; + count = 1; + variance = 0; + isNull = false; + } else { + sum += value; + count++; + if (count > 1) { + double t = count * value - sum; + variance += (t * t) / ((double) count * (count - 1)); + } + } + } + + @Override + public int getVariableSize() { + throw new UnsupportedOperationException(); + } + + @Override + public void reset () { + isNull = true; + sum = 0; + count = 0; + variance = 0; + } + } + +#IF PARTIAL1 + transient private LongWritable resultCount; + transient private DoubleWritable resultSum; + transient private DoubleWritable resultVariance; + transient private Object[] partialResult; + + transient private ObjectInspector soi; +#ENDIF PARTIAL1 +#IF COMPLETE + transient private DoubleWritable fullResult; + + transient private ObjectInspector oi; +#ENDIF COMPLETE + + + public (VectorExpression inputExpression, + GenericUDAFEvaluator.Mode mode) { + super(inputExpression, mode); +#IF PARTIAL1 + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.PARTIAL1); +#ENDIF PARTIAL1 +#IF COMPLETE + Preconditions.checkState(this.mode == GenericUDAFEvaluator.Mode.COMPLETE); +#ENDIF COMPLETE + } + + private void init() { +#IF PARTIAL1 + partialResult = new Object[3]; + resultCount = new LongWritable(); + resultSum = new DoubleWritable(); + resultVariance = new DoubleWritable(); + partialResult[0] = resultCount; + partialResult[1] = resultSum; + partialResult[2] = resultVariance; + initPartialResultInspector(); +#ENDIF PARTIAL1 +#IF COMPLETE + fullResult = new DoubleWritable(); + initFullResultInspector(); +#ENDIF COMPLETE + } + +#IF PARTIAL1 + private void initPartialResultInspector() { + List foi = new ArrayList(); + foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); + + List fname = new ArrayList(); + fname.add("count"); + fname.add("sum"); + fname.add("variance"); + + soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); + } +#ENDIF PARTIAL1 +#IF COMPLETE + private void initFullResultInspector() { + oi = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; + } +#ENDIF COMPLETE + + private Aggregation getCurrentAggregationBuffer( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + int row) { + VectorAggregationBufferRow mySet = aggregationBufferSets[row]; + Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); + return myagg; + } + + + @Override + public void aggregateInputSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + VectorizedRowBatch batch) throws HiveException { + + inputExpression.evaluate(batch); + + TimestampColumnVector inputColVector = (TimestampColumnVector)batch. + cols[this.inputExpression.getOutputColumn()]; + + int batchSize = batch.size; + + if (batchSize == 0) { + return; + } + + if (inputColVector.isRepeating) { + if (inputColVector.noNulls || !inputColVector.isNull[0]) { + iterateRepeatingNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector.getDouble(0), batchSize); + } + } + else if (!batch.selectedInUse && inputColVector.noNulls) { + iterateNoSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize); + } + else if (!batch.selectedInUse) { + iterateNoSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); + } + else if (inputColVector.noNulls){ + iterateSelectionNoNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); + } + else { + iterateSelectionHasNullsWithAggregationSelection( + aggregationBufferSets, aggregateIndex, inputColVector, batchSize, + inputColVector.isNull, batch.selected); + } + + } + + private void iterateRepeatingNoNullsWithAggregationSelection( + VectorAggregationBufferRow[] aggregationBufferSets, + int aggregateIndex, + double value, + int batchSize) { + + for (int i=0; i 1) { +#IF VARIANCE + fullResult.set(myagg.variance / (myagg.count)); +#ENDIF VARIANCE +#IF VARIANCE_SAMPLE + fullResult.set(myagg.variance / (myagg.count - 1)); +#ENDIF VARIANCE_SAMPLE +#IF STD + fullResult.set(Math.sqrt(myagg.variance / (myagg.count))); +#ENDIF STD +#IF STD_SAMPLE + fullResult.set(Math.sqrt(myagg.variance / (myagg.count - 1))); +#ENDIF STD_SAMPLE + } else { + + // For one element the variance is always 0. + fullResult.set(0); + } + + return fullResult; +#ENDIF COMPLETE + } + } + + @Override + public ObjectInspector getOutputObjectInspector() { +#IF PARTIAL1 + return soi; +#ENDIF PARTIAL1 +#IF COMPLETE + return oi; +#ENDIF COMPLETE + } + + @Override + public long getAggregationBufferFixedSize() { + JavaDataModel model = JavaDataModel.get(); + return JavaDataModel.alignUp( + model.object() + + model.primitive2()*3+ + model.primitive1(), + model.memoryAlign()); + } + + @Override + public void init(AggregationDesc desc) throws HiveException { + init(); + } +} + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index c70e1e0..a1cf76b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -3784,59 +3784,6 @@ public static StandardStructObjectInspector constructVectorizedReduceRowOI( return rowObjectInspector; } - /** - * Check if LLAP IO supports the column type that is being read - * @param conf - configuration - * @return false for types not supported by vectorization, true otherwise - */ - public static boolean checkVectorizerSupportedTypes(final Configuration conf) { - final String[] readColumnNames = ColumnProjectionUtils.getReadColumnNames(conf); - final String columnNames = conf.get(serdeConstants.LIST_COLUMNS); - final String columnTypes = conf.get(serdeConstants.LIST_COLUMN_TYPES); - if (columnNames == null || columnTypes == null || columnNames.isEmpty() || - columnTypes.isEmpty()) { - LOG.warn("Column names ({}) or types ({}) is null. Skipping type checking for LLAP IO.", - columnNames, columnTypes); - return true; - } - final List allColumnNames = Lists.newArrayList(columnNames.split(",")); - final List typeInfos = TypeInfoUtils.getTypeInfosFromTypeString(columnTypes); - final List allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos); - return checkVectorizerSupportedTypes(Lists.newArrayList(readColumnNames), allColumnNames, - allColumnTypes); - } - - /** - * Check if LLAP IO supports the column type that is being read - * @param readColumnNames - columns that will be read from the table/partition - * @param allColumnNames - all columns - * @param allColumnTypes - all column types - * @return false for types not supported by vectorization, true otherwise - */ - public static boolean checkVectorizerSupportedTypes(final List readColumnNames, - final List allColumnNames, final List allColumnTypes) { - final String[] readColumnTypes = getReadColumnTypes(readColumnNames, allColumnNames, - allColumnTypes); - - if (readColumnTypes != null) { - for (String readColumnType : readColumnTypes) { - if (readColumnType != null) { - if (!Vectorizer.validateDataType(readColumnType, - VectorExpressionDescriptor.Mode.PROJECTION)) { - LOG.warn("Unsupported column type encountered ({}). Disabling LLAP IO.", - readColumnType); - return false; - } - } - } - } else { - LOG.warn("readColumnTypes is null. Skipping type checking for LLAP IO. " + - "readColumnNames: {} allColumnNames: {} allColumnTypes: {} readColumnTypes: {}", - readColumnNames, allColumnNames, allColumnTypes, readColumnTypes); - } - return true; - } - private static String[] getReadColumnTypes(final List readColumnNames, final List allColumnNames, final List allColumnTypes) { if (readColumnNames == null || allColumnNames == null || allColumnTypes == null || diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java index e473580..36158a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkReduceRecordHandler.java @@ -32,6 +32,9 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorDeserializeRow; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -45,6 +48,9 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -56,6 +62,8 @@ import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringUtils; +import com.google.common.base.Preconditions; + /** * Clone from ExecReducer, it is the bridge between the spark framework and * the Hive operator pipeline at execution time. It's main responsibilities are: @@ -85,20 +93,29 @@ private ObjectInspector[] rowObjectInspector; private boolean vectorized = false; - // runtime objects - private transient Object keyObject; - private transient BytesWritable groupKey; + private VectorDeserializeRow keyBinarySortableDeserializeToRow; + + private VectorDeserializeRow valueLazyBinaryDeserializeToRow; + + private VectorizedRowBatch batch; + private long batchBytes = 0; + private boolean handleGroupKey = true; // For now. private DataOutputBuffer buffer; - private VectorizedRowBatch[] batches; + // number of columns pertaining to keys in a vectorized row batch - private int keysColumnOffset; + private int firstValueColumnOffset; + private static final int BATCH_SIZE = VectorizedRowBatch.DEFAULT_SIZE; private static final int BATCH_BYTES = VectorizedRowBatch.DEFAULT_BYTES; + + // runtime objects + private transient Object keyObject; + private transient BytesWritable groupKey; + private StructObjectInspector keyStructInspector; - private StructObjectInspector[] valueStructInspectors; + private StructObjectInspector valueStructInspector; /* this is only used in the error code path */ - private List[] valueStringWriters; private MapredLocalWork localWork = null; @Override @@ -128,11 +145,14 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws if (vectorized) { final int maxTags = gWork.getTagToValueDesc().size(); + + // CONSIDER: Cleaning up this code and eliminating the arrays. Vectorization only handles + // one operator tree. + Preconditions.checkState(maxTags == 1); + keyStructInspector = (StructObjectInspector) keyObjectInspector; - batches = new VectorizedRowBatch[maxTags]; - valueStructInspectors = new StructObjectInspector[maxTags]; - valueStringWriters = new List[maxTags]; - keysColumnOffset = keyStructInspector.getAllStructFieldRefs().size(); + firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size(); + buffer = new DataOutputBuffer(); } @@ -149,20 +169,48 @@ public void init(JobConf job, OutputCollector output, Reporter reporter) throws if (vectorized) { /* vectorization only works with struct object inspectors */ - valueStructInspectors[tag] = (StructObjectInspector) valueObjectInspector[tag]; + valueStructInspector = (StructObjectInspector) valueObjectInspector[tag]; - final int totalColumns = keysColumnOffset - + valueStructInspectors[tag].getAllStructFieldRefs().size(); - valueStringWriters[tag] = new ArrayList(totalColumns); - valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory - .genVectorStructExpressionWritables(keyStructInspector))); - valueStringWriters[tag].addAll(Arrays.asList(VectorExpressionWriterFactory - .genVectorStructExpressionWritables(valueStructInspectors[tag]))); + final int totalColumns = firstValueColumnOffset + + valueStructInspector.getAllStructFieldRefs().size(); rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, - valueStructInspectors[tag]); - batches[tag] = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch(); - + valueStructInspector); + batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch(); + + // Setup vectorized deserialization for the key and value. + BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer; + + keyBinarySortableDeserializeToRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + keyStructInspector), + /* useExternalBuffer */ true, + binarySortableSerDe.getSortOrders(), + binarySortableSerDe.getNullMarkers(), + binarySortableSerDe.getNotNullMarkers())); + keyBinarySortableDeserializeToRow.init(0); + + final int valuesSize = valueStructInspector.getAllStructFieldRefs().size(); + if (valuesSize > 0) { + valueLazyBinaryDeserializeToRow = + new VectorDeserializeRow( + new LazyBinaryDeserializeRead( + VectorizedBatchUtil.typeInfosFromStructObjectInspector( + valueStructInspector), + /* useExternalBuffer */ true)); + valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset); + + // Create data buffers for value bytes column vectors. + for (int i = firstValueColumnOffset; i < batch.numCols; i++) { + ColumnVector colVector = batch.cols[i]; + if (colVector instanceof BytesColumnVector) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector; + bytesColumnVector.initBuffer(); + } + } + } } else { ois.add(keyObjectInspector); @@ -243,16 +291,27 @@ public void remove() { private DummyIterator dummyIterator = new DummyIterator(); /** - * Process one row using a dummy iterator. + * Process one row using a dummy iterator. Or, add row to vector batch. */ @Override public void processRow(Object key, final Object value) throws IOException { - dummyIterator.setValue(value); - processRow(key, dummyIterator); + if (vectorized) { + processVectorRow(key, value); + } else { + dummyIterator.setValue(value); + processRow(key, dummyIterator); + } } + + @Override public void processRow(Object key, Iterator values) throws IOException { + if (vectorized) { + processVectorRows(key, values); + return; + } + if (reducer.getDone()) { return; } @@ -295,12 +354,7 @@ public void processRow(Object key, final Object value) throws IOException { reducer.setGroupKeyObject(keyObject); reducer.startGroup(); } - /* this.keyObject passed via reference */ - if (vectorized) { - processVectors(values, tag); - } else { - processKeyValues(values, tag); - } + processKeyValues(values, tag); } catch (Throwable e) { abort = true; @@ -357,62 +411,152 @@ public void processRow(Object key, final Object value) throws IOException { return true; // give me more } - /** - * @param values - * @return true if it is not done and can take more inputs - */ - private boolean processVectors(Iterator values, byte tag) throws HiveException { - VectorizedRowBatch batch = batches[tag]; - batch.reset(); - buffer.reset(); - - /* deserialize key into columns */ - VectorizedBatchUtil.addRowToBatchFrom(keyObject, keyStructInspector, 0, 0, batch, buffer); - for (int i = 0; i < keysColumnOffset; i++) { - VectorizedBatchUtil.setRepeatingColumn(batch, i); + private void processVectorRows(Object key, Iterator values) throws IOException { + if (reducer.getDone()) { + return; } + while (values.hasNext()) { + processVectorRow(key, values.next()); + } + } + + private void processVectorRow(Object key, final Object value) throws IOException { + BytesWritable keyWritable = (BytesWritable) key; + BytesWritable valueWritable = (BytesWritable) value; - int rowIdx = 0; - int batchBytes = 0; try { - while (values.hasNext()) { - /* deserialize value into columns */ - BytesWritable valueWritable = (BytesWritable) values.next(); - Object valueObj = deserializeValue(valueWritable, tag); - - VectorizedBatchUtil.addRowToBatchFrom(valueObj, valueStructInspectors[tag], rowIdx, - keysColumnOffset, batch, buffer); - batchBytes += valueWritable.getLength(); - rowIdx++; - if (rowIdx >= BATCH_SIZE || batchBytes > BATCH_BYTES) { - VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.process(batch, tag); - rowIdx = 0; - batchBytes = 0; - if (LOG.isInfoEnabled()) { - logMemoryInfo(); + + if (handleGroupKey) { + final boolean isKeyChange; + if (groupKey == null) { + + // The first group. + isKeyChange = true; + groupKey = new BytesWritable(); + } else { + isKeyChange = !keyWritable.equals(groupKey); + } + + if (isKeyChange) { + + // Flush current group batch as last batch of group. + if (batch.size > 0) { + + // Forward; reset key and value columns. + forwardBatch(/* resetValueColumnsOnly */ false); + reducer.endGroup(); + } + + reducer.startGroup(); + + // Deserialize group key into vector row columns. + byte[] keyBytes = keyWritable.getBytes(); + int keyLength = keyWritable.getLength(); + + groupKey.set(keyBytes, 0, keyLength); + + keyBinarySortableDeserializeToRow.setBytes(keyBytes, 0, keyLength); + try { + keyBinarySortableDeserializeToRow.deserialize(batch, 0); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead details: " + + keyBinarySortableDeserializeToRow.getDetailedReadPositionString(), + e); + } + + // And, mark group keys as repeating. + for(int i = 0; i < firstValueColumnOffset; i++) { + VectorizedBatchUtil.setRepeatingColumn(batch, i); } } + + // Can we add to current batch? + if (batch.size >= batch.getMaxSize() || + batch.size > 0 && batchBytes >= BATCH_BYTES) { + + // Batch is full or using too much space. + forwardBatch(/* resetValueColumnsOnly */ true); + } + + if (valueLazyBinaryDeserializeToRow != null) { + // Deserialize value into vector row columns. + byte[] valueBytes = valueWritable.getBytes(); + int valueLength = valueWritable.getLength(); + batchBytes += valueLength; + + valueLazyBinaryDeserializeToRow.setBytes(valueBytes, 0, valueLength); + valueLazyBinaryDeserializeToRow.deserialize(batch, batch.size); + } + batch.size++; + } else { + + // No group key. + + // Can we add to current batch? + if (batch.size >= batch.getMaxSize() || + batch.size > 0 && batchBytes >= BATCH_BYTES) { + + // Batch is full or using too much space. + forwardBatch(/* resetValueColumnsOnly */ false); + } + + // Deserialize key into vector row columns. + byte[] keyBytes = keyWritable.getBytes(); + int keyLength = keyWritable.getLength(); + + keyBinarySortableDeserializeToRow.setBytes(keyBytes, 0, keyLength); + try { + keyBinarySortableDeserializeToRow.deserialize(batch, 0); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead details: " + + keyBinarySortableDeserializeToRow.getDetailedReadPositionString(), + e); + } + + if (valueLazyBinaryDeserializeToRow != null) { + // Deserialize value into vector row columns. + byte[] valueBytes = valueWritable.getBytes(); + int valueLength = valueWritable.getLength(); + + batchBytes += valueLength; + + valueLazyBinaryDeserializeToRow.setBytes(valueBytes, 0, valueLength); + valueLazyBinaryDeserializeToRow.deserialize(batch, batch.size); + } + batch.size++; } - if (rowIdx > 0) { - VectorizedBatchUtil.setBatchSize(batch, rowIdx); - reducer.process(batch, tag); - } - if (LOG.isInfoEnabled()) { - logMemoryInfo(); + } catch (Throwable e) { + abort = true; + if (e instanceof OutOfMemoryError) { + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; + } else { + throw new RuntimeException(e); } - } catch (Exception e) { - String rowString = null; - try { - rowString = batch.toString(); - } catch (Exception e2) { - rowString = "[Error getting row data with exception " + StringUtils.stringifyException(e2) - + " ]"; + } + } + + private void forwardBatch(boolean resetValueColumnsOnly) throws HiveException { + reducer.process(batch, 0); + + if (resetValueColumnsOnly) { + // Reset just the value columns and value buffer. + for (int i = firstValueColumnOffset; i < batch.numCols; i++) { + // Note that reset also resets the data buffer for bytes column vectors. + batch.cols[i].reset(); } - throw new HiveException("Error while processing vector batch (tag=" + tag + ") " - + rowString, e); + batch.size = 0; + } else { + // Reset key and value columns; and batch.size + batch.reset(); + } + + batchBytes = 0; + if (LOG.isInfoEnabled()) { + logMemoryInfo(); } - return true; // give me more } private Object deserializeValue(BytesWritable valueWritable, byte tag) throws HiveException { @@ -435,10 +579,19 @@ public void close() { } try { - if (groupKey != null) { - // If a operator wants to do some work at the end of a group - LOG.trace("End Group"); - reducer.endGroup(); + if (vectorized) { + if (batch.size > 0) { + forwardBatch(/* resetValueColumnsOnly */ false); + if (handleGroupKey) { + reducer.endGroup(); + } + } + } else { + if (groupKey != null) { + // If a operator wants to do some work at the end of a group + LOG.trace("End Group"); + reducer.endGroup(); + } } if (LOG.isInfoEnabled()) { logCloseInfo(); @@ -472,4 +625,12 @@ public void close() { public boolean getDone() { return reducer.getDone(); } + + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + sb.append(String.format("\\%03d", (int) (bytes[i] & 0xff))); + } + return sb.toString(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java index 60660ac..43f9db3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java @@ -108,9 +108,6 @@ private StructObjectInspector keyStructInspector; private StructObjectInspector valueStructInspectors; - /* this is only used in the error code path */ - private List valueStringWriters; - private KeyValuesAdapter reader; private boolean handleGroupKey; @@ -171,13 +168,6 @@ void init(JobConf jconf, Operator reducer, boolean vectorized, TableDesc keyT final int totalColumns = firstValueColumnOffset + valueStructInspectors.getAllStructFieldRefs().size(); - valueStringWriters = new ArrayList(totalColumns); - valueStringWriters.addAll(Arrays - .asList(VectorExpressionWriterFactory - .genVectorStructExpressionWritables(keyStructInspector))); - valueStringWriters.addAll(Arrays - .asList(VectorExpressionWriterFactory - .genVectorStructExpressionWritables(valueStructInspectors))); rowObjectInspector = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspectors); @@ -449,9 +439,6 @@ private void processVectorGroup(BytesWritable keyWritable, int valueLength = valueWritable.getLength(); batchBytes += valueLength; - // l4j.info("ReduceRecordSource processVectorGroup valueBytes " + valueLength + " " + - // VectorizedBatchUtil.displayBytes(valueBytes, 0, valueLength)); - valueLazyBinaryDeserializeToRow.setBytes(valueBytes, 0, valueLength); valueLazyBinaryDeserializeToRow.deserialize(batch, rowIdx); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index b0d1c75..c7aa93e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -248,6 +248,15 @@ public void init(List typeNames) throws HiveException { } } + /* + * Initialize using one target data type info. + */ + public void init(TypeInfo typeInfo, int outputColumnNum) throws HiveException { + + allocateArrays(1); + initTargetEntry(0, outputColumnNum, typeInfo); + } + /** * Initialize for conversion from a provided (source) data types to the target data types * desired in the VectorizedRowBatch. @@ -365,53 +374,111 @@ private void assignRowColumn( VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) columnVector).vector[batchIndex] = - (((BooleanWritable) object).get() ? 1 : 0); + if (object instanceof Boolean) { + ((LongColumnVector) columnVector).vector[batchIndex] = + (((Boolean) object) ? 1 : 0); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + (((BooleanWritable) object).get() ? 1 : 0); + } break; case BYTE: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((ByteWritable) object).get(); + if (object instanceof Byte) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Byte) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((ByteWritable) object).get(); + } break; case SHORT: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((ShortWritable) object).get(); + if (object instanceof Short) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Short) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((ShortWritable) object).get(); + } break; case INT: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((IntWritable) object).get(); + if (object instanceof Integer) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Integer) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((IntWritable) object).get(); + } break; case LONG: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((LongWritable) object).get(); + if (object instanceof Long) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Long) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((LongWritable) object).get(); + } break; case TIMESTAMP: - ((TimestampColumnVector) columnVector).set( - batchIndex, ((TimestampWritable) object).getTimestamp()); + if (object instanceof Timestamp) { + ((TimestampColumnVector) columnVector).set( + batchIndex, ((Timestamp) object)); + } else { + ((TimestampColumnVector) columnVector).set( + batchIndex, ((TimestampWritable) object).getTimestamp()); + } break; case DATE: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((DateWritable) object).getDays(); + if (object instanceof Date) { + ((LongColumnVector) columnVector).vector[batchIndex] = + DateWritable.dateToDays((Date) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((DateWritable) object).getDays(); + } break; case FLOAT: - ((DoubleColumnVector) columnVector).vector[batchIndex] = - ((FloatWritable) object).get(); + if (object instanceof Float) { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((Float) object); + } else { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((FloatWritable) object).get(); + } break; case DOUBLE: - ((DoubleColumnVector) columnVector).vector[batchIndex] = - ((DoubleWritable) object).get(); + if (object instanceof Double) { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((Double) object); + } else { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((DoubleWritable) object).get(); + } break; case BINARY: { - BytesWritable bw = (BytesWritable) object; - ((BytesColumnVector) columnVector).setVal( - batchIndex, bw.getBytes(), 0, bw.getLength()); + if (object instanceof byte[]) { + byte[] bytes = (byte[]) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, bytes, 0, bytes.length); + } else { + BytesWritable bw = (BytesWritable) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, bw.getBytes(), 0, bw.getLength()); + } } break; case STRING: { - Text tw = (Text) object; - ((BytesColumnVector) columnVector).setVal( - batchIndex, tw.getBytes(), 0, tw.getLength()); + if (object instanceof String) { + String string = (String) object; + byte[] bytes = string.getBytes(); + ((BytesColumnVector) columnVector).setVal( + batchIndex, bytes, 0, bytes.length); + } else { + Text tw = (Text) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, tw.getBytes(), 0, tw.getLength()); + } } break; case VARCHAR: @@ -463,12 +530,22 @@ private void assignRowColumn( } break; case INTERVAL_YEAR_MONTH: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); + if (object instanceof HiveIntervalYearMonth) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((HiveIntervalYearMonth) object).getTotalMonths(); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); + } break; case INTERVAL_DAY_TIME: - ((IntervalDayTimeColumnVector) columnVector).set( - batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); + if (object instanceof HiveIntervalDayTime) { + ((IntervalDayTimeColumnVector) columnVector).set( + batchIndex, (HiveIntervalDayTime) object); + } else { + ((IntervalDayTimeColumnVector) columnVector).set( + batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); + } break; default: throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 30916a0..642dd46 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -947,7 +947,9 @@ protected void initializeOp(Configuration hconf) throws HiveException { for (int i = 0; i < aggregators.length; ++i) { aggregators[i].init(conf.getAggregators().get(i)); - objectInspectors.add(aggregators[i].getOutputObjectInspector()); + ObjectInspector objInsp = aggregators[i].getOutputObjectInspector(); + Preconditions.checkState(objInsp != null); + objectInspectors.add(objInsp); } keyWrappersBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 7dc4c81..503bd0c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -52,20 +52,25 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor.InputExpressionType; import org.apache.hadoop.hive.ql.exec.vector.expressions.*; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFAvgTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilter; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFBloomFilterMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCount; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountMerge; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFCountStar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdPopTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFStdSampTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumDecimal; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarPopTimestamp; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFVarSampTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorUDAFSumTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalFinal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDecimalPartial2; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgFinal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgPartial2; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFAvgTimestampComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDecimal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMaxLong; @@ -77,19 +82,44 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinString; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFMinTimestamp; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDecimalComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopFinal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdPopTimestampComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDecimalComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampFinal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFStdSampTimestampComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFSumLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPartial2; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDecimalComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopFinal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarPopTimestampComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDecimal; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDecimalComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDouble; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampDoubleComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampFinal; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLong; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampLongComplete; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampTimestamp; +import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.VectorUDAFVarSampTimestampComplete; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor; import org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc; @@ -2643,12 +2673,19 @@ private Timestamp evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveExceptio } static String getScratchName(TypeInfo typeInfo) throws HiveException { + // For now, leave DECIMAL precision/scale in the name so DecimalColumnVector scratch columns // don't need their precision/scale adjusted... if (typeInfo.getCategory() == Category.PRIMITIVE && ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory() == PrimitiveCategory.DECIMAL) { return typeInfo.getTypeName(); } + + // And, for Complex Types, also leave the children types in place... + if (typeInfo.getCategory() != Category.PRIMITIVE) { + return typeInfo.getTypeName(); + } + Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); return columnVectorType.name().toLowerCase(); } @@ -2819,6 +2856,13 @@ public static String mapTypeNameSynonyms(String typeName) { add(new AggregateDefinition("count", ArgumentType.INT_FAMILY, Mode.PARTIAL2, VectorUDAFCountMerge.class)); add(new AggregateDefinition("count", ArgumentType.INT_FAMILY, Mode.FINAL, VectorUDAFCountMerge.class)); + // TIMESTAMP SUM takes a TimestampColumnVector as input for PARTIAL1 and COMPLETE. + // But the output is a double. + add(new AggregateDefinition("sum", ArgumentType.TIMESTAMP, Mode.PARTIAL1, VectorUDAFSumTimestamp.class)); + add(new AggregateDefinition("sum", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFSumTimestamp.class)); + add(new AggregateDefinition("sum", ArgumentType.TIMESTAMP, Mode.PARTIAL2, VectorUDAFSumDouble.class)); + add(new AggregateDefinition("sum", ArgumentType.TIMESTAMP, Mode.FINAL, VectorUDAFSumDouble.class)); + // Since the partial aggregation produced by AVG is a STRUCT with count and sum and the // STRUCT data type isn't vectorized yet, we currently only support PARTIAL1. When we do // support STRUCTs for average partial aggregation, we'll need 4 variations: @@ -2828,12 +2872,29 @@ public static String mapTypeNameSynonyms(String typeName) { // FINAL STRUCT Average Partial Aggregation --> Full Aggregation // COMPLETE Original data --> Full Aggregation // + // NOTE: Since we do average of timestamps internally as double, we do not need a VectorUDAFAvgTimestampPartial2. + // add(new AggregateDefinition("avg", ArgumentType.INT_FAMILY, Mode.PARTIAL1, VectorUDAFAvgLong.class)); add(new AggregateDefinition("avg", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL1, VectorUDAFAvgDouble.class)); add(new AggregateDefinition("avg", ArgumentType.DECIMAL, Mode.PARTIAL1, VectorUDAFAvgDecimal.class)); add(new AggregateDefinition("avg", ArgumentType.TIMESTAMP, Mode.PARTIAL1, VectorUDAFAvgTimestamp.class)); - // We haven't had a chance to examine the VAR* and STD* area and expand it beyond PARTIAL1. + // (PARTIAL2 FLOAT_FAMILY covers INT_FAMILY and TIMESTAMP because it is: + // STRUCT Average Partial Aggregation --> STRUCT Average Partial Aggregation + add(new AggregateDefinition("avg", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFAvgPartial2.class)); + add(new AggregateDefinition("avg", ArgumentType.DECIMAL, Mode.PARTIAL2, VectorUDAFAvgDecimalPartial2.class)); + + // (FINAL FLOAT_FAMILY covers INT_FAMILY and TIMESTAMP) + add(new AggregateDefinition("avg", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFAvgFinal.class)); + add(new AggregateDefinition("avg", ArgumentType.DECIMAL, Mode.FINAL, VectorUDAFAvgDecimalFinal.class)); + add(new AggregateDefinition("avg", ArgumentType.TIMESTAMP, Mode.FINAL, VectorUDAFAvgFinal.class)); + + add(new AggregateDefinition("avg", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFAvgLongComplete.class)); + add(new AggregateDefinition("avg", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFAvgDoubleComplete.class)); + add(new AggregateDefinition("avg", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFAvgDecimalComplete.class)); + add(new AggregateDefinition("avg", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFAvgTimestampComplete.class)); + + // We haven't had a chance to examine the VAR* and STD* area and expand it beyond PARTIAL1 and COMPLETE. add(new AggregateDefinition("variance", ArgumentType.INT_FAMILY, Mode.PARTIAL1, VectorUDAFVarPopLong.class)); add(new AggregateDefinition("var_pop", ArgumentType.INT_FAMILY, Mode.PARTIAL1, VectorUDAFVarPopLong.class)); add(new AggregateDefinition("variance", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL1, VectorUDAFVarPopDouble.class)); @@ -2863,6 +2924,52 @@ public static String mapTypeNameSynonyms(String typeName) { add(new AggregateDefinition("stddev_samp", ArgumentType.DECIMAL, Mode.PARTIAL1, VectorUDAFStdSampDecimal.class)); add(new AggregateDefinition("stddev_samp", ArgumentType.TIMESTAMP, Mode.PARTIAL1, VectorUDAFStdSampTimestamp.class)); + add(new AggregateDefinition("variance", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFVarPopLongComplete.class)); + add(new AggregateDefinition("var_pop", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFVarPopLongComplete.class)); + add(new AggregateDefinition("variance", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFVarPopDoubleComplete.class)); + add(new AggregateDefinition("var_pop", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFVarPopDoubleComplete.class)); + add(new AggregateDefinition("variance", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFVarPopDecimalComplete.class)); + add(new AggregateDefinition("var_pop", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFVarPopDecimalComplete.class)); + add(new AggregateDefinition("variance", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFVarPopTimestampComplete.class)); + add(new AggregateDefinition("var_pop", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFVarPopTimestampComplete.class)); + add(new AggregateDefinition("var_samp", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFVarSampLongComplete.class)); + add(new AggregateDefinition("var_samp" , ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFVarSampDoubleComplete.class)); + add(new AggregateDefinition("var_samp" , ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFVarSampDecimalComplete.class)); + add(new AggregateDefinition("var_samp" , ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFVarSampTimestampComplete.class)); + add(new AggregateDefinition("std", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopLongComplete.class)); + add(new AggregateDefinition("stddev", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopLongComplete.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopLongComplete.class)); + add(new AggregateDefinition("std", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopDoubleComplete.class)); + add(new AggregateDefinition("stddev", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopDoubleComplete.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFStdPopDoubleComplete.class)); + add(new AggregateDefinition("std", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFStdPopDecimalComplete.class)); + add(new AggregateDefinition("stddev", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFStdPopDecimalComplete.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFStdPopDecimalComplete.class)); + add(new AggregateDefinition("std", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFStdPopTimestampComplete.class)); + add(new AggregateDefinition("stddev", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFStdPopTimestampComplete.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFStdPopTimestampComplete.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.INT_FAMILY, Mode.COMPLETE, VectorUDAFStdSampLongComplete.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.FLOAT_FAMILY, Mode.COMPLETE, VectorUDAFStdSampDoubleComplete.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.DECIMAL, Mode.COMPLETE, VectorUDAFStdSampDecimalComplete.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.TIMESTAMP, Mode.COMPLETE, VectorUDAFStdSampTimestampComplete.class)); + + // (PARTIAL2L FLOAT_FAMILY covers INT_FAMILY, DECIMAL, and TIMESTAMP) + add(new AggregateDefinition("variance", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("var_pop", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("var_samp", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("std", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("stddev", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.FLOAT_FAMILY, Mode.PARTIAL2, VectorUDAFVarPartial2.class)); + + add(new AggregateDefinition("variance", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFVarPopFinal.class)); + add(new AggregateDefinition("var_pop", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFVarPopFinal.class)); + add(new AggregateDefinition("var_samp", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFVarSampFinal.class)); + add(new AggregateDefinition("std", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFStdPopFinal.class)); + add(new AggregateDefinition("stddev", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFStdPopFinal.class)); + add(new AggregateDefinition("stddev_pop", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFStdPopFinal.class)); + add(new AggregateDefinition("stddev_samp", ArgumentType.FLOAT_FAMILY, Mode.FINAL, VectorUDAFStdSampFinal.class)); + // UDAFBloomFilter. Original data is one type, partial/final is another, // so this requires 2 aggregation classes (partial1/complete), (partial2/final) add(new AggregateDefinition("bloom_filter", ArgumentType.ALL_FAMILY, Mode.PARTIAL1, VectorUDAFBloomFilter.class)); @@ -2885,16 +2992,42 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) String aggregateName = desc.getGenericUDAFName(); VectorExpressionDescriptor.ArgumentType inputType = VectorExpressionDescriptor.ArgumentType.NONE; + GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode(); if (paramDescList.size() > 0) { ExprNodeDesc inputExpr = paramDescList.get(0); - inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString()); - if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) { - throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString()); + TypeInfo inputTypeInfo = inputExpr.getTypeInfo(); + if (inputTypeInfo.getCategory() == Category.STRUCT) { + + // Must be AVG or one of the variance aggregations doing PARTIAL2 or FINAL. + // E.g. AVG PARTIAL2 and FINAL accept struct + if (udafEvaluatorMode != GenericUDAFEvaluator.Mode.PARTIAL2 && + udafEvaluatorMode != GenericUDAFEvaluator.Mode.FINAL) { + throw new HiveException("Input expression Hive type name " + inputExpr.getTypeString() + " and group by mode is " + udafEvaluatorMode.name() + + " -- expected PARTIAL2 or FINAL"); + } + GenericUDAFEvaluator evaluator = desc.getGenericUDAFEvaluator(); + + // UNDONE: What about AVG FINAL TIMESTAMP? + if (evaluator instanceof GenericUDAFAverage.GenericUDAFAverageEvaluatorDouble || + evaluator instanceof GenericUDAFVariance.GenericUDAFVarianceEvaluator) { + inputType = VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY; + } else if (evaluator instanceof GenericUDAFAverage.GenericUDAFAverageEvaluatorDecimal) { + inputType = VectorExpressionDescriptor.ArgumentType.DECIMAL; + } else { + // Nothing else supported yet... + throw new HiveException("Evaluator " + evaluator.getClass().getName() + " not supported"); + } + } else { + String inputExprTypeString = inputTypeInfo.getTypeName(); + + inputType = VectorExpressionDescriptor.ArgumentType.fromHiveTypeName(inputExpr.getTypeString()); + if (inputType == VectorExpressionDescriptor.ArgumentType.NONE) { + throw new HiveException("No vector argument type for Hive type name " + inputExpr.getTypeString()); + } } } - GenericUDAFEvaluator.Mode udafEvaluatorMode = desc.getMode(); for (AggregateDefinition aggDef : aggregatesDefinition) { if (aggregateName.equalsIgnoreCase(aggDef.getName()) && ((aggDef.getType() == VectorExpressionDescriptor.ArgumentType.NONE && @@ -2911,14 +3044,14 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) try { Constructor ctor = - aggClass.getConstructor(VectorExpression.class); + aggClass.getConstructor(VectorExpression.class, GenericUDAFEvaluator.Mode.class); VectorAggregateExpression aggExpr = ctor.newInstance( - vectorParams.length > 0 ? vectorParams[0] : null); + vectorParams.length > 0 ? vectorParams[0] : null, udafEvaluatorMode); aggExpr.init(desc); return aggExpr; } catch (Exception e) { throw new HiveException("Internal exception for vector aggregate : \"" + - aggregateName + "\" for type: \"" + inputType + "", e); + aggregateName + "\" for type: \"" + inputType + "\": " + getStackTraceAsSingleLine(e)); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index c20bc68..1fb70f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; @@ -565,6 +568,50 @@ public static VectorExpressionWriter genVectorExpressionWritable(ExprNodeDesc no } /** + * Specialized writer for ListColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterList extends VectorExpressionWriterBase { + + // For now, we just use this to hold the object inspector. There are no writeValue, + // setValue, etc methods yet... + + } + + /** + * Specialized writer for MapColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterMap extends VectorExpressionWriterBase { + + // For now, we just use this to hold the object inspector. There are no writeValue, + // setValue, etc methods yet... + + } + + /** + * Specialized writer for StructColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterStruct extends VectorExpressionWriterBase { + + // For now, we just use this to hold the object inspector. There are no writeValue, + // setValue, etc methods yet... + + } + + /** + * Specialized writer for UnionColumnVector. Will throw cast exception + * if the wrong vector column is used. + */ + private static abstract class VectorExpressionWriterUnion extends VectorExpressionWriterBase { + + // For now, we just use this to hold the object inspector. There are no writeValue, + // setValue, etc methods yet... + + } + + /** * Compiles the appropriate vector expression writer based on an expression info (ExprNodeDesc) */ public static VectorExpressionWriter genVectorExpressionWritable( @@ -629,11 +676,22 @@ public static VectorExpressionWriter genVectorExpressionWritable( ((PrimitiveObjectInspector) fieldObjInspector).getPrimitiveCategory()); } + case LIST: + return genVectorExpressionWritableList( + (SettableListObjectInspector) fieldObjInspector); + + case MAP: + return genVectorExpressionWritableMap( + (SettableMapObjectInspector) fieldObjInspector); + case STRUCT: + return genVectorExpressionWritableStruct( + (SettableStructObjectInspector) fieldObjInspector); + case UNION: - case MAP: - case LIST: - return genVectorExpressionWritableEmpty(); + return genVectorExpressionWritableUnion( + (SettableUnionObjectInspector) fieldObjInspector); + default: throw new IllegalArgumentException("Unknown type " + fieldObjInspector.getCategory()); @@ -1339,6 +1397,130 @@ public Object initValue(Object ignored) { }.init(fieldObjInspector); } + private static VectorExpressionWriter genVectorExpressionWritableList( + SettableListObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterList() { + private Object obj; + + public VectorExpressionWriter init(SettableListObjectInspector objInspector) throws HiveException { + super.init(objInspector); + obj = initValue(null); + return this; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableListObjectInspector) this.objectInspector).create(0); + } + + @Override + public Object writeValue(ColumnVector column, int row) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + + @Override + public Object setValue(Object row, ColumnVector column, int columnRow) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + }.init(fieldObjInspector); + } + + private static VectorExpressionWriter genVectorExpressionWritableMap( + SettableMapObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterMap() { + private Object obj; + + public VectorExpressionWriter init(SettableMapObjectInspector objInspector) throws HiveException { + super.init(objInspector); + obj = initValue(null); + return this; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableMapObjectInspector) this.objectInspector).create(); + } + + @Override + public Object writeValue(ColumnVector column, int row) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + + @Override + public Object setValue(Object row, ColumnVector column, int columnRow) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + }.init(fieldObjInspector); + } + + private static VectorExpressionWriter genVectorExpressionWritableStruct( + SettableStructObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterMap() { + private Object obj; + + public VectorExpressionWriter init(SettableStructObjectInspector objInspector) throws HiveException { + super.init(objInspector); + obj = initValue(null); + return this; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableStructObjectInspector) this.objectInspector).create(); + } + + @Override + public Object writeValue(ColumnVector column, int row) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + + @Override + public Object setValue(Object row, ColumnVector column, int columnRow) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + }.init(fieldObjInspector); + } + + private static VectorExpressionWriter genVectorExpressionWritableUnion( + SettableUnionObjectInspector fieldObjInspector) throws HiveException { + + return new VectorExpressionWriterMap() { + private Object obj; + + public VectorExpressionWriter init(SettableUnionObjectInspector objInspector) throws HiveException { + super.init(objInspector); + obj = initValue(null); + return this; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableUnionObjectInspector) this.objectInspector).create(); + } + + @Override + public Object writeValue(ColumnVector column, int row) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + + @Override + public Object setValue(Object row, ColumnVector column, int columnRow) + throws HiveException { + throw new HiveException("Not implemented yet"); + } + }.init(fieldObjInspector); + } + // For complex types like STRUCT, MAP, etc we do not support, we need a writer that // does nothing. We assume the Vectorizer class has not validated the query to actually // try and use the complex types. They do show up in inputObjInspector[0] and need to be diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java index 7ab4473..702c3d5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorAggregateExpression.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.AggregationDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** @@ -34,6 +35,19 @@ private static final long serialVersionUID = 1L; + protected final VectorExpression inputExpression; + protected final GenericUDAFEvaluator.Mode mode; + + public VectorAggregateExpression(VectorExpression inputExpression, + GenericUDAFEvaluator.Mode mode) { + this.inputExpression = inputExpression; + this.mode = mode; + } + + public VectorExpression getInputExpression() { + return inputExpression; + } + /** * Buffer interface to store aggregates. */ @@ -56,7 +70,6 @@ public abstract void aggregateInputSelection(VectorAggregationBufferRow[] aggreg public boolean hasVariableSize() { return false; } - public abstract VectorExpression inputExpression(); public abstract void init(AggregationDesc desc) throws HiveException; @@ -64,7 +77,7 @@ public boolean hasVariableSize() { public String toString() { StringBuilder sb = new StringBuilder(); sb.append(this.getClass().getSimpleName()); - VectorExpression inputExpression = inputExpression(); + VectorExpression inputExpression = getInputExpression(); if (inputExpression != null) { sb.append("("); sb.append(inputExpression.toString()); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java deleted file mode 100644 index 4aac9d3..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFAvgDecimal.java +++ /dev/null @@ -1,521 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.vector.expressions.DecimalUtil; -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; -import org.apache.hadoop.hive.ql.exec.vector.VectorAggregationBufferRow; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.AggregationDesc; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; -import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; - -/** - * Generated from template VectorUDAFAvg.txt. - */ -@Description(name = "avg", - value = "_FUNC_(AVG) - Returns the average value of expr (vectorized, type: decimal)") -public class VectorUDAFAvgDecimal extends VectorAggregateExpression { - - private static final long serialVersionUID = 1L; - - /** class for storing the current aggregate value. */ - static class Aggregation implements AggregationBuffer { - - private static final long serialVersionUID = 1L; - - transient private final HiveDecimalWritable sum = new HiveDecimalWritable(); - transient private long count; - transient private boolean isNull; - - public void sumValueWithNullCheck(HiveDecimalWritable writable) { - if (isNull) { - // Make a copy since we intend to mutate sum. - sum.set(writable); - count = 1; - isNull = false; - } else { - // Note that if sum is out of range, mutateAdd will ignore the call. - // At the end, sum.isSet() can be checked for null. - sum.mutateAdd(writable); - count++; - } - } - - public void sumValueNoNullCheck(HiveDecimalWritable writable) { - sum.mutateAdd(writable); - count++; - } - - @Override - public int getVariableSize() { - throw new UnsupportedOperationException(); - } - - @Override - public void reset() { - isNull = true; - sum.setFromLong(0L); - count = 0; - } - } - - private VectorExpression inputExpression; - - @Override - public VectorExpression inputExpression() { - return inputExpression; - } - - transient private Object[] partialResult; - transient private LongWritable resultCount; - transient private HiveDecimalWritable resultSum; - transient private StructObjectInspector soi; - - /** - * The scale of the SUM in the partial output - */ - private short sumScale; - - /** - * The precision of the SUM in the partial output - */ - private short sumPrecision; - - /** - * the scale of the input expression - */ - private short inputScale; - - /** - * the precision of the input expression - */ - private short inputPrecision; - - public VectorUDAFAvgDecimal(VectorExpression inputExpression) { - this(); - this.inputExpression = inputExpression; - } - - public VectorUDAFAvgDecimal() { - super(); - partialResult = new Object[2]; - resultCount = new LongWritable(); - resultSum = new HiveDecimalWritable(); - partialResult[0] = resultCount; - partialResult[1] = resultSum; - - } - - private void initPartialResultInspector() { - // the output type of the vectorized partial aggregate must match the - // expected type for the row-mode aggregation - // For decimal, the type is "same number of integer digits and 4 more decimal digits" - - DecimalTypeInfo dtiSum = GenericUDAFAverage.deriveSumFieldTypeInfo(inputPrecision, inputScale); - this.sumScale = (short) dtiSum.scale(); - this.sumPrecision = (short) dtiSum.precision(); - - List foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(dtiSum)); - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); - return myagg; - } - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - VectorizedRowBatch batch) throws HiveException { - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - inputExpression.evaluate(batch); - - DecimalColumnVector inputVector = ( DecimalColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - HiveDecimalWritable[] vector = inputVector.vector; - - if (inputVector.noNulls) { - if (inputVector.isRepeating) { - iterateNoNullsRepeatingWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector[0], batchSize); - } else { - if (batch.selectedInUse) { - iterateNoNullsSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector, batch.selected, batchSize); - } else { - iterateNoNullsWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector, batchSize); - } - } - } else { - if (inputVector.isRepeating) { - if (batch.selectedInUse) { - iterateHasNullsRepeatingSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector[0], batchSize, batch.selected, inputVector.isNull); - } else { - iterateHasNullsRepeatingWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector[0], batchSize, inputVector.isNull); - } - } else { - if (batch.selectedInUse) { - iterateHasNullsSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector, batchSize, batch.selected, inputVector.isNull); - } else { - iterateHasNullsWithAggregationSelection( - aggregationBufferSets, bufferIndex, - vector, batchSize, inputVector.isNull); - } - } - } - } - - private void iterateNoNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable value, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(value); - } - } - - private void iterateNoNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable[] values, - int[] selection, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(values[selection[i]]); - } - } - - private void iterateNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable[] values, - int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(values[i]); - } - } - - private void iterateHasNullsRepeatingSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable value, - int batchSize, - int[] selection, - boolean[] isNull) { - - if (isNull[0]) { - return; - } - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(value); - } - - } - - private void iterateHasNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable value, - int batchSize, - boolean[] isNull) { - - if (isNull[0]) { - return; - } - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(value); - } - } - - private void iterateHasNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable[] values, - int batchSize, - int[] selection, - boolean[] isNull) { - - for (int j=0; j < batchSize; ++j) { - int i = selection[j]; - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - j); - myagg.sumValueWithNullCheck(values[i]); - } - } - } - - private void iterateHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - HiveDecimalWritable[] values, - int batchSize, - boolean[] isNull) { - - for (int i=0; i < batchSize; ++i) { - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValueWithNullCheck(values[i]); - } - } - } - - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - DecimalColumnVector inputVector = - (DecimalColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - HiveDecimalWritable[] vector = inputVector.vector; - - if (inputVector.isRepeating) { - if (inputVector.noNulls) { - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum.setFromLong(0L); - myagg.count = 0; - } - HiveDecimal value = vector[0].getHiveDecimal(); - HiveDecimal multiple = value.multiply(HiveDecimal.create(batchSize)); - myagg.sum.mutateAdd(multiple); - myagg.count += batchSize; - } - return; - } - - if (!batch.selectedInUse && inputVector.noNulls) { - iterateNoSelectionNoNulls(myagg, vector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull); - } - else if (inputVector.noNulls){ - iterateSelectionNoNulls(myagg, vector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, vector, batchSize, inputVector.isNull, batch.selected); - } - } - - private void iterateSelectionHasNulls( - Aggregation myagg, - HiveDecimalWritable[] vector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - if (!isNull[i]) { - myagg.sumValueWithNullCheck(vector[i]); - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - HiveDecimalWritable[] vector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum.setFromLong(0L); - myagg.count = 0; - } - - for (int i=0; i< batchSize; ++i) { - myagg.sumValueNoNullCheck(vector[selected[i]]); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - HiveDecimalWritable[] vector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(bufferIndex); - return myagg; - } - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - VectorizedRowBatch batch) throws HiveException { - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - iterateNoNullsRepeatingWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector.getDouble(0), - batchSize); - } else { - if (batch.selectedInUse) { - iterateNoNullsSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector, batch.selected, batchSize); - } else { - iterateNoNullsWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector, batchSize); - } - } - } else { - if (inputColVector.isRepeating) { - if (batch.selectedInUse) { - iterateHasNullsRepeatingSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector.getDouble(0), batchSize, batch.selected, inputColVector.isNull); - } else { - iterateHasNullsRepeatingWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector.getDouble(0), batchSize, inputColVector.isNull); - } - } else { - if (batch.selectedInUse) { - iterateHasNullsSelectionWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector, batchSize, batch.selected, inputColVector.isNull); - } else { - iterateHasNullsWithAggregationSelection( - aggregationBufferSets, bufferIndex, - inputColVector, batchSize, inputColVector.isNull); - } - } - } - } - - private void iterateNoNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - double value, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue(value); - } - } - - private void iterateNoNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - TimestampColumnVector inputColVector, - int[] selection, - int batchSize) { - - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue( - inputColVector.getDouble(selection[i])); - } - } - - private void iterateNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - TimestampColumnVector inputColVector, - int batchSize) { - for (int i=0; i < batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue(inputColVector.getDouble(i)); - } - } - - private void iterateHasNullsRepeatingSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - double value, - int batchSize, - int[] selection, - boolean[] isNull) { - - for (int i=0; i < batchSize; ++i) { - if (!isNull[selection[i]]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue(value); - } - } - - } - - private void iterateHasNullsRepeatingWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - double value, - int batchSize, - boolean[] isNull) { - - for (int i=0; i < batchSize; ++i) { - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue(value); - } - } - } - - private void iterateHasNullsSelectionWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - TimestampColumnVector inputColVector, - int batchSize, - int[] selection, - boolean[] isNull) { - - for (int j=0; j < batchSize; ++j) { - int i = selection[j]; - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - j); - myagg.sumValue(inputColVector.getDouble(i)); - } - } - } - - private void iterateHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int bufferIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for (int i=0; i < batchSize; ++i) { - if (!isNull[i]) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - bufferIndex, - i); - myagg.sumValue(inputColVector.getDouble(i)); - } - } - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = - (TimestampColumnVector)batch.cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; - myagg.count = 0; - } - myagg.sum += inputColVector.getDouble(0)*batchSize; - myagg.count += batchSize; - } - return; - } - - if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); - } - } - - private void iterateSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - int i = selected[j]; - if (!isNull[i]) { - double value = inputColVector.getDouble(i); - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; - myagg.count = 0; - } - myagg.sum += value; - myagg.count += 1; - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.isNull = false; - myagg.sum = 0; - myagg.count = 0; - } - - for (int i=0; i< batchSize; ++i) { - double value = inputColVector.getDouble(selected[i]); - myagg.sum += value; - myagg.count += 1; - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - fname.add("variance"); - - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); - return myagg; - } - - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputColVector.isNull[0]) { - iterateRepeatingNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, - inputColVector.isNull, batch.selected); - } - - } - - private void iterateRepeatingNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - double value, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - j); - int i = selected[j]; - if (!isNull[i]) { - double value = inputColVector.getDouble(i); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - for (int i=0; i< batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - i); - double value = inputColVector.getDouble(selected[i]); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateNoSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { - iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); - } - } - - private void iterateRepeatingNoNulls( - Aggregation myagg, - double value, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - // TODO: conjure a formula w/o iterating - // - - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // We pulled out i=0 so we can remove the count > 1 check in the loop - for (int i=1; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(selected[0]); - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove the count > 1 check in the loop - // - for (int i=1; i< batchSize; ++i) { - value = inputColVector.getDouble(selected[i]); - myagg.sum += value; - myagg.count += 1; - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(0); - myagg.sum += value; - myagg.count += 1; - - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove count > 1 check - for (int i=1; i foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - fname.add("variance"); - - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); - return myagg; - } - - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputColVector.isNull[0]) { - iterateRepeatingNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, - inputColVector.isNull, batch.selected); - } - - } - - private void iterateRepeatingNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - double value, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - j); - int i = selected[j]; - if (!isNull[i]) { - double value = inputColVector.getDouble(i); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - for (int i=0; i< batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - i); - double value = inputColVector.getDouble(selected[i]); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateNoSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { - iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); - } - } - - private void iterateRepeatingNoNulls( - Aggregation myagg, - double value, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - // TODO: conjure a formula w/o iterating - // - - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // We pulled out i=0 so we can remove the count > 1 check in the loop - for (int i=1; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(selected[0]); - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove the count > 1 check in the loop - // - for (int i=1; i< batchSize; ++i) { - value = inputColVector.getDouble(selected[i]); - myagg.sum += value; - myagg.count += 1; - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(0); - myagg.sum += value; - myagg.count += 1; - - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove count > 1 check - for (int i=1; i foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - fname.add("variance"); - - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); - return myagg; - } - - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputColVector.isNull[0]) { - iterateRepeatingNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, - inputColVector.isNull, batch.selected); - } - - } - - private void iterateRepeatingNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - double value, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - j); - int i = selected[j]; - if (!isNull[i]) { - double value = inputColVector.getDouble(i); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - for (int i=0; i< batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - i); - double value = inputColVector.getDouble(selected[i]); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateNoSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { - iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); - } - } - - private void iterateRepeatingNoNulls( - Aggregation myagg, - double value, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - // TODO: conjure a formula w/o iterating - // - - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // We pulled out i=0 so we can remove the count > 1 check in the loop - for (int i=1; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(selected[0]); - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove the count > 1 check in the loop - // - for (int i=1; i< batchSize; ++i) { - value = inputColVector.getDouble(selected[i]); - myagg.sum += value; - myagg.count += 1; - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(0); - myagg.sum += value; - myagg.count += 1; - - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove count > 1 check - for (int i=1; i foi = new ArrayList(); - foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector); - - List fname = new ArrayList(); - fname.add("count"); - fname.add("sum"); - fname.add("variance"); - - soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi); - } - - private Aggregation getCurrentAggregationBuffer( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - int row) { - VectorAggregationBufferRow mySet = aggregationBufferSets[row]; - Aggregation myagg = (Aggregation) mySet.getAggregationBuffer(aggregateIndex); - return myagg; - } - - - @Override - public void aggregateInputSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - VectorizedRowBatch batch) throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls || !inputColVector.isNull[0]) { - iterateRepeatingNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNullsWithAggregationSelection( - aggregationBufferSets, aggregateIndex, inputColVector, batchSize, - inputColVector.isNull, batch.selected); - } - - } - - private void iterateRepeatingNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - double value, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull, - int[] selected) { - - for (int j=0; j< batchSize; ++j) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - j); - int i = selected[j]; - if (!isNull[i]) { - double value = inputColVector.getDouble(i); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - for (int i=0; i< batchSize; ++i) { - Aggregation myagg = getCurrentAggregationBuffer( - aggregationBufferSets, - aggregateIndex, - i); - double value = inputColVector.getDouble(selected[i]); - if (myagg.isNull) { - myagg.init (); - } - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - private void iterateNoSelectionHasNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNullsWithAggregationSelection( - VectorAggregationBufferRow[] aggregationBufferSets, - int aggregateIndex, - TimestampColumnVector inputColVector, - int batchSize) { - - for (int i=0; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - - @Override - public void aggregateInput(AggregationBuffer agg, VectorizedRowBatch batch) - throws HiveException { - - inputExpression.evaluate(batch); - - TimestampColumnVector inputColVector = (TimestampColumnVector)batch. - cols[this.inputExpression.getOutputColumn()]; - - int batchSize = batch.size; - - if (batchSize == 0) { - return; - } - - Aggregation myagg = (Aggregation)agg; - - if (inputColVector.isRepeating) { - if (inputColVector.noNulls) { - iterateRepeatingNoNulls(myagg, inputColVector.getDouble(0), batchSize); - } - } - else if (!batch.selectedInUse && inputColVector.noNulls) { - iterateNoSelectionNoNulls(myagg, inputColVector, batchSize); - } - else if (!batch.selectedInUse) { - iterateNoSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull); - } - else if (inputColVector.noNulls){ - iterateSelectionNoNulls(myagg, inputColVector, batchSize, batch.selected); - } - else { - iterateSelectionHasNulls(myagg, inputColVector, batchSize, inputColVector.isNull, batch.selected); - } - } - - private void iterateRepeatingNoNulls( - Aggregation myagg, - double value, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - // TODO: conjure a formula w/o iterating - // - - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // We pulled out i=0 so we can remove the count > 1 check in the loop - for (int i=1; i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - int[] selected) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(selected[0]); - myagg.sum += value; - myagg.count += 1; - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove the count > 1 check in the loop - // - for (int i=1; i< batchSize; ++i) { - value = inputColVector.getDouble(selected[i]); - myagg.sum += value; - myagg.count += 1; - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - - private void iterateNoSelectionHasNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize, - boolean[] isNull) { - - for(int i=0;i 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - } - } - } - - private void iterateNoSelectionNoNulls( - Aggregation myagg, - TimestampColumnVector inputColVector, - int batchSize) { - - if (myagg.isNull) { - myagg.init (); - } - - double value = inputColVector.getDouble(0); - myagg.sum += value; - myagg.count += 1; - - if(myagg.count > 1) { - double t = myagg.count*value - myagg.sum; - myagg.variance += (t*t) / ((double)myagg.count*(myagg.count-1)); - } - - // i=0 was pulled out to remove count > 1 check - for (int i=1; i stack, NodeProcessorCtx procCtx, try { ret = validateMapWorkOperator(op, mapWork, isTezOrSpark); } catch (Exception e) { + String oneLineStackTrace = VectorizationContext.getStackTraceAsSingleLine(e); + LOG.info(oneLineStackTrace); throw new SemanticException(e); } if (!ret) { @@ -1699,6 +1703,13 @@ public PhysicalContext resolve(PhysicalContext physicalContext) throws SemanticE HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_ENABLED); + isVectorizationComplexTypesEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED); + isVectorizationGroupByComplexTypesEnabled = + HiveConf.getBoolVar(hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED); + isSchemaEvolution = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_SCHEMA_EVOLUTION); @@ -1872,7 +1883,8 @@ private boolean validateMapJoinOperator(MapJoinOperator op) { private boolean validateMapJoinDesc(MapJoinDesc desc) { byte posBigTable = (byte) desc.getPosBigTable(); List filterExprs = desc.getFilters().get(posBigTable); - if (!validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateExprNodeDesc( + filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true)) { return false; } List keyExprs = desc.getKeys().get(posBigTable); @@ -1903,7 +1915,8 @@ private boolean validateSparkHashTableSinkOperator(SparkHashTableSinkOperator op List filterExprs = desc.getFilters().get(tag); List keyExprs = desc.getKeys().get(tag); List valueExprs = desc.getExprs().get(tag); - return validateExprNodeDesc(filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER) && + return validateExprNodeDesc( + filterExprs, "Filter", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true) && validateExprNodeDesc(keyExprs, "Key") && validateExprNodeDesc(valueExprs, "Value"); } @@ -1928,7 +1941,8 @@ private boolean validateSelectOperator(SelectOperator op) { private boolean validateFilterOperator(FilterOperator op) { ExprNodeDesc desc = op.getConf().getPredicate(); - return validateExprNodeDesc(desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER); + return validateExprNodeDesc( + desc, "Predicate", VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true); } private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, boolean isTezOrSpark) { @@ -1938,7 +1952,7 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo setOperatorIssue("DISTINCT not supported"); return false; } - boolean ret = validateExprNodeDesc(desc.getKeys(), "Key"); + boolean ret = validateExprNodeDescNoComplex(desc.getKeys(), "Key"); if (!ret) { return false; } @@ -2045,12 +2059,12 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo VectorGroupByDesc.groupByDescModeToVectorProcessingMode(desc.getMode(), hasKeys); if (desc.isGroupingSetsPresent() && (processingMode != ProcessingMode.HASH && processingMode != ProcessingMode.STREAMING)) { - LOG.info("Vectorized GROUPING SETS only expected for HASH and STREAMING processing modes"); + setOperatorIssue("Vectorized GROUPING SETS only expected for HASH and STREAMING processing modes"); return false; } Pair retPair = - validateAggregationDescs(desc.getAggregators(), processingMode, hasKeys); + validateAggregationDescs(desc.getAggregators(), desc.getMode(), hasKeys); if (!retPair.left) { return false; } @@ -2064,6 +2078,9 @@ private boolean validateGroupByOperator(GroupByOperator op, boolean isReduce, bo vectorDesc.setProcessingMode(processingMode); + vectorDesc.setIsVectorizationComplexTypesEnabled(isVectorizationComplexTypesEnabled); + vectorDesc.setIsVectorizationGroupByComplexTypesEnabled(isVectorizationGroupByComplexTypesEnabled); + LOG.info("Vector GROUP BY operator will use processing mode " + processingMode.name() + ", isVectorOutput " + vectorDesc.isVectorOutput()); @@ -2075,14 +2092,21 @@ private boolean validateFileSinkOperator(FileSinkOperator op) { } private boolean validateExprNodeDesc(List descs, String expressionTitle) { - return validateExprNodeDesc(descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); + return validateExprNodeDesc( + descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */ true); + } + + private boolean validateExprNodeDescNoComplex(List descs, String expressionTitle) { + return validateExprNodeDesc( + descs, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */ false); } private boolean validateExprNodeDesc(List descs, String expressionTitle, - VectorExpressionDescriptor.Mode mode) { + VectorExpressionDescriptor.Mode mode, + boolean allowComplex) { for (ExprNodeDesc d : descs) { - boolean ret = validateExprNodeDesc(d, expressionTitle, mode); + boolean ret = validateExprNodeDesc(d, expressionTitle, mode, allowComplex); if (!ret) { return false; } @@ -2091,10 +2115,10 @@ private boolean validateExprNodeDesc(List descs, } private Pair validateAggregationDescs(List descs, - ProcessingMode processingMode, boolean hasKeys) { + GroupByDesc.Mode groupByMode, boolean hasKeys) { boolean outputIsPrimitive = true; for (AggregationDesc d : descs) { - Pair retPair = validateAggregationDesc(d, processingMode, hasKeys); + Pair retPair = validateAggregationDesc(d, groupByMode, hasKeys); if (!retPair.left) { return retPair; } @@ -2106,7 +2130,7 @@ private boolean validateExprNodeDesc(List descs, } private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressionTitle, - VectorExpressionDescriptor.Mode mode) { + VectorExpressionDescriptor.Mode mode, boolean allowComplex) { if (desc instanceof ExprNodeColumnDesc) { ExprNodeColumnDesc c = (ExprNodeColumnDesc) desc; // Currently, we do not support vectorized virtual columns (see HIVE-5570). @@ -2116,9 +2140,11 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi } } String typeName = desc.getTypeInfo().getTypeName(); - boolean ret = validateDataType(typeName, mode); + boolean ret = validateDataType(typeName, mode, allowComplex && isVectorizationComplexTypesEnabled); if (!ret) { - setExpressionIssue(expressionTitle, "Data type " + typeName + " of " + desc.toString() + " not supported"); + setExpressionIssue(expressionTitle, + getValidateDataTypeErrorMsg( + typeName, mode, allowComplex, isVectorizationComplexTypesEnabled)); return false; } boolean isInExpression = false; @@ -2144,7 +2170,8 @@ private boolean validateExprNodeDescRecursive(ExprNodeDesc desc, String expressi for (ExprNodeDesc d : desc.getChildren()) { // Don't restrict child expressions for projection. // Always use loose FILTER mode. - if (!validateExprNodeDescRecursive(d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER)) { + if (!validateExprNodeDescRecursive( + d, expressionTitle, VectorExpressionDescriptor.Mode.FILTER, /* allowComplex */ true)) { return false; } } @@ -2195,12 +2222,13 @@ private boolean validateStructInExpression(ExprNodeDesc desc, } private boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle) { - return validateExprNodeDesc(desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION); + return validateExprNodeDesc( + desc, expressionTitle, VectorExpressionDescriptor.Mode.PROJECTION, /* allowComplex */ true); } boolean validateExprNodeDesc(ExprNodeDesc desc, String expressionTitle, - VectorExpressionDescriptor.Mode mode) { - if (!validateExprNodeDescRecursive(desc, expressionTitle, mode)) { + VectorExpressionDescriptor.Mode mode, boolean allowComplex) { + if (!validateExprNodeDescRecursive(desc, expressionTitle, mode, allowComplex)) { return false; } try { @@ -2239,12 +2267,12 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { return true; } - public static ObjectInspector.Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { + public static Category aggregationOutputCategory(VectorAggregateExpression vectorAggrExpr) { ObjectInspector outputObjInspector = vectorAggrExpr.getOutputObjectInspector(); return outputObjInspector.getCategory(); } - private Pair validateAggregationDesc(AggregationDesc aggDesc, ProcessingMode processingMode, + private Pair validateAggregationDesc(AggregationDesc aggDesc, GroupByDesc.Mode groupByMode, boolean hasKeys) { String udfName = aggDesc.getGenericUDAFName().toLowerCase(); @@ -2253,12 +2281,16 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { return new Pair(false, false); } /* + // The planner seems to pull this one out. if (aggDesc.getDistinct()) { setExpressionIssue("Aggregation Function", "DISTINCT not supported"); return new Pair(false, false); } */ - if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters(), "Aggregation Function UDF " + udfName + " parameter")) { + + ArrayList parameters = aggDesc.getParameters(); + + if (parameters != null && !validateExprNodeDesc(parameters, "Aggregation Function UDF " + udfName + " parameter")) { return new Pair(false, false); } @@ -2280,27 +2312,90 @@ private boolean validateGenericUdf(ExprNodeGenericFuncDesc genericUDFExpr) { " vector expression " + vectorAggrExpr.toString()); } - ObjectInspector.Category outputCategory = aggregationOutputCategory(vectorAggrExpr); - boolean outputIsPrimitive = (outputCategory == ObjectInspector.Category.PRIMITIVE); - if (processingMode == ProcessingMode.MERGE_PARTIAL && - hasKeys && - !outputIsPrimitive) { - setOperatorIssue("Vectorized Reduce MergePartial GROUP BY keys can only handle aggregate outputs that are primitive types"); - return new Pair(false, false); + boolean canVectorizeComplexType = + (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + + boolean isVectorOutput; + if (canVectorizeComplexType) { + isVectorOutput = true; + } else { + + // Do complex input type checking... + boolean inputIsPrimitive; + if (parameters == null || parameters.size() == 0) { + inputIsPrimitive = true; // Pretend for COUNT(*) + } else { + + // Multi-input should have been eliminated earlier. + // Preconditions.checkState(parameters.size() == 1); + + final Category inputCategory = parameters.get(0).getTypeInfo().getCategory(); + inputIsPrimitive = (inputCategory == Category.PRIMITIVE); + } + + if (!inputIsPrimitive) { + setOperatorIssue("Cannot vectorize GROUP BY with aggregation complex type inputs in " + + aggDesc.getExprString() + " since " + + GroupByDesc.getComplexTypeWithGroupByEnabledCondition( + isVectorizationComplexTypesEnabled, isVectorizationGroupByComplexTypesEnabled)); + return new Pair(false, false); + } + + // Now, look a the output. If the output is complex, we switch to row-mode for all child + // operators... + isVectorOutput = (aggregationOutputCategory(vectorAggrExpr) == Category.PRIMITIVE); } - return new Pair(true, outputIsPrimitive); + return new Pair(true, isVectorOutput); } - public static boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode) { + public static boolean validateDataType(String type, VectorExpressionDescriptor.Mode mode, + boolean allowComplex) { + type = type.toLowerCase(); boolean result = supportedDataTypesPattern.matcher(type).matches(); if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { return false; } + + if (!result) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + if (typeInfo.getCategory() != Category.PRIMITIVE) { + if (allowComplex) { + return true; + } + } + } return result; } + public static String getValidateDataTypeErrorMsg(String type, VectorExpressionDescriptor.Mode mode, + boolean allowComplex, boolean isVectorizationComplexTypesEnabled) { + + type = type.toLowerCase(); + boolean result = supportedDataTypesPattern.matcher(type).matches(); + if (result && mode == VectorExpressionDescriptor.Mode.PROJECTION && type.equals("void")) { + return "Vectorizing data type void not supported when mode = PROJECTION"; + } + + if (!result) { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(type); + if (typeInfo.getCategory() != Category.PRIMITIVE) { + if (allowComplex && isVectorizationComplexTypesEnabled) { + return null; + } else if (!allowComplex) { + return "Vectorizing complex type " + typeInfo.getCategory() + " not supported"; + } else { + return "Vectorizing complex type " + typeInfo.getCategory() + " not enabled (" + + type + ") since " + + GroupByDesc.getComplexTypeEnabledCondition( + isVectorizationComplexTypesEnabled); + } + } + } + return (result ? null : "Vectorizing data type " + type + " not supported"); + } + private VectorizationContext getVectorizationContext(String contextName, VectorTaskColumnInfo vectorTaskColumnInfo) { @@ -3482,7 +3577,7 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { } VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); for (VectorAggregateExpression vecAggr : vecAggregators) { - if (usesVectorUDFAdaptor(vecAggr.inputExpression())) { + if (usesVectorUDFAdaptor(vecAggr.getInputExpression())) { vectorTaskColumnInfo.setUsesVectorUDFAdaptor(true); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java index fe91ee7..45d100d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.VectorAggregateExpression; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; @@ -305,6 +306,21 @@ public void setDistinct(boolean isDistinct) { this.isDistinct = isDistinct; } + @Override + public Object clone() { + ArrayList outputColumnNames = new ArrayList<>(); + outputColumnNames.addAll(this.outputColumnNames); + ArrayList keys = new ArrayList<>(); + keys.addAll(this.keys); + ArrayList aggregators = new ArrayList<>(); + aggregators.addAll(this.aggregators); + List listGroupingSets = new ArrayList<>(); + listGroupingSets.addAll(this.listGroupingSets); + return new GroupByDesc(this.mode, outputColumnNames, keys, aggregators, + this.groupByMemoryUsage, this.memoryThreshold, listGroupingSets, this.groupingSetsPresent, + this.groupingSetPosition, this.isDistinct); + } + public class GroupByOperatorExplainVectorization extends OperatorExplainVectorization { private final GroupByDesc groupByDesc; @@ -337,20 +353,42 @@ public boolean getGroupByRowOutputCascade() { return vectorGroupByDesc.isVectorOutput(); } + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorProcessingMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getProcessingMode() { + return vectorGroupByDesc.getProcessingMode().name(); + } + + @Explain(vectorization = Vectorization.OPERATOR, displayName = "groupByMode", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getGroupByMode() { + return groupByDesc.getMode().name(); + } + @Explain(vectorization = Vectorization.OPERATOR, displayName = "vectorOutputConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getVectorOutputConditionsNotMet() { List results = new ArrayList(); + + boolean isVectorizationComplexTypesEnabled = vectorGroupByDesc.getIsVectorizationComplexTypesEnabled(); + boolean isVectorizationGroupByComplexTypesEnabled = vectorGroupByDesc.getIsVectorizationGroupByComplexTypesEnabled(); + + if (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled) { + return null; + } + VectorAggregateExpression[] vecAggregators = vectorGroupByDesc.getAggregators(); for (VectorAggregateExpression vecAggr : vecAggregators) { Category category = Vectorizer.aggregationOutputCategory(vecAggr); if (category != ObjectInspector.Category.PRIMITIVE) { results.add( - "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE IS false"); + "Vector output of " + vecAggr.toString() + " output type " + category + " requires PRIMITIVE type IS false"); } } if (results.size() == 0) { return null; } + + results.add( + getComplexTypeWithGroupByEnabledCondition( + isVectorizationComplexTypesEnabled, isVectorizationGroupByComplexTypesEnabled)); return results; } @@ -368,18 +406,21 @@ public GroupByOperatorExplainVectorization getGroupByVectorization() { return new GroupByOperatorExplainVectorization(this, vectorDesc); } - @Override - public Object clone() { - ArrayList outputColumnNames = new ArrayList<>(); - outputColumnNames.addAll(this.outputColumnNames); - ArrayList keys = new ArrayList<>(); - keys.addAll(this.keys); - ArrayList aggregators = new ArrayList<>(); - aggregators.addAll(this.aggregators); - List listGroupingSets = new ArrayList<>(); - listGroupingSets.addAll(this.listGroupingSets); - return new GroupByDesc(this.mode, outputColumnNames, keys, aggregators, - this.groupByMemoryUsage, this.memoryThreshold, listGroupingSets, this.groupingSetsPresent, - this.groupingSetPosition, this.isDistinct); + public static String getComplexTypeEnabledCondition( + boolean isVectorizationComplexTypesEnabled) { + return + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + + " IS " + isVectorizationComplexTypesEnabled; + } + + public static String getComplexTypeWithGroupByEnabledCondition( + boolean isVectorizationComplexTypesEnabled, + boolean isVectorizationGroupByComplexTypesEnabled) { + final boolean enabled = (isVectorizationComplexTypesEnabled && isVectorizationGroupByComplexTypesEnabled); + return "(" + + HiveConf.ConfVars.HIVE_VECTORIZATION_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationComplexTypesEnabled + + " AND " + + HiveConf.ConfVars.HIVE_VECTORIZATION_GROUPBY_COMPLEX_TYPES_ENABLED.varname + " " + isVectorizationGroupByComplexTypesEnabled + + ") IS " + enabled; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 2120400..8b99ae0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -299,27 +299,6 @@ public void deriveLlap(Configuration conf, boolean isExecDriver) { isLlapOn, canWrapAny, hasPathToPartInfo, hasLlap, hasNonLlap, hasAcid); } - private boolean checkVectorizerSupportedTypes(boolean hasLlap) { - for (Map.Entry> entry : aliasToWork.entrySet()) { - final String alias = entry.getKey(); - Operator op = entry.getValue(); - PartitionDesc partitionDesc = aliasToPartnInfo.get(alias); - if (op instanceof TableScanOperator && partitionDesc != null && - partitionDesc.getTableDesc() != null) { - final TableScanOperator tsOp = (TableScanOperator) op; - final List readColumnNames = tsOp.getNeededColumns(); - final Properties props = partitionDesc.getTableDesc().getProperties(); - final List typeInfos = TypeInfoUtils.getTypeInfosFromTypeString( - props.getProperty(serdeConstants.LIST_COLUMN_TYPES)); - final List allColumnTypes = TypeInfoUtils.getTypeStringsFromTypeInfo(typeInfos); - final List allColumnNames = Utilities.getColumnNames(props); - hasLlap = Utilities.checkVectorizerSupportedTypes(readColumnNames, allColumnNames, - allColumnTypes); - } - } - return hasLlap; - } - private static String deriveLlapIoDescString(boolean isLlapOn, boolean canWrapAny, boolean hasPathToPartInfo, boolean hasLlap, boolean hasNonLlap, boolean hasAcid) { if (!isLlapOn) return null; // LLAP IO is off, don't output. diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java index f9a8725..89d868d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java @@ -65,6 +65,8 @@ private VectorExpression[] keyExpressions; private VectorAggregateExpression[] aggregators; private int[] projectedOutputColumns; + private boolean isVectorizationComplexTypesEnabled; + private boolean isVectorizationGroupByComplexTypesEnabled; public VectorGroupByDesc() { this.processingMode = ProcessingMode.NONE; @@ -110,6 +112,22 @@ public void setProjectedOutputColumns(int[] projectedOutputColumns) { return projectedOutputColumns; } + public void setIsVectorizationComplexTypesEnabled(boolean isVectorizationComplexTypesEnabled) { + this.isVectorizationComplexTypesEnabled = isVectorizationComplexTypesEnabled; + } + + public boolean getIsVectorizationComplexTypesEnabled() { + return isVectorizationComplexTypesEnabled; + } + + public void setIsVectorizationGroupByComplexTypesEnabled(boolean isVectorizationGroupByComplexTypesEnabled) { + this.isVectorizationGroupByComplexTypesEnabled = isVectorizationGroupByComplexTypesEnabled; + } + + public boolean getIsVectorizationGroupByComplexTypesEnabled() { + return isVectorizationGroupByComplexTypesEnabled; + } + /** * Which ProcessingMode for VectorGroupByOperator? * diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java index a28f7e8..2ea426c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java @@ -259,16 +259,18 @@ protected ObjectInspector getSumFieldWritableObjectInspector() { } private DecimalTypeInfo deriveResultDecimalTypeInfo() { - int prec = inputOI.precision(); - int scale = inputOI.scale(); + return deriveResultDecimalTypeInfo(inputOI.precision(), inputOI.scale(), mode); + } + + public static DecimalTypeInfo deriveResultDecimalTypeInfo(int precision, int scale, Mode mode) { if (mode == Mode.FINAL || mode == Mode.COMPLETE) { - int intPart = prec - scale; + int intPart = precision - scale; // The avg() result type has the same number of integer digits and 4 more decimal digits. scale = Math.min(scale + 4, HiveDecimal.MAX_SCALE - intPart); return TypeInfoFactory.getDecimalTypeInfo(intPart + scale, scale); } else { // For intermediate sum field - return GenericUDAFAverage.deriveSumFieldTypeInfo(prec, scale); + return GenericUDAFAverage.deriveSumFieldTypeInfo(precision, scale); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 6d3b92b..a041ffc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -211,15 +211,15 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc super.init(m, parameters); result = new HiveDecimalWritable(0); inputOI = (PrimitiveObjectInspector) parameters[0]; - // The output precision is 10 greater than the input which should cover at least - // 10b rows. The scale is the same as the input. - DecimalTypeInfo outputTypeInfo = null; + + final DecimalTypeInfo outputTypeInfo; if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { - int precision = Math.min(HiveDecimal.MAX_PRECISION, inputOI.precision() + 10); - outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputOI.scale()); + outputTypeInfo = getOutputDecimalTypeInfoForSum(inputOI.precision(), inputOI.scale(), mode); } else { + // No change. outputTypeInfo = (DecimalTypeInfo) inputOI.getTypeInfo(); } + ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(outputTypeInfo); outputOI = (PrimitiveObjectInspector) ObjectInspectorUtils.getStandardObjectInspector( oi, ObjectInspectorCopyOption.JAVA); @@ -227,6 +227,21 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc return oi; } + public static DecimalTypeInfo getOutputDecimalTypeInfoForSum(final int inputPrecision, + int inputScale, Mode mode) { + + // The output precision is 10 greater than the input which should cover at least + // 10b rows. The scale is the same as the input. + DecimalTypeInfo outputTypeInfo = null; + if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) { + int precision = Math.min(HiveDecimal.MAX_PRECISION, inputPrecision + 10); + outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(precision, inputScale); + } else { + outputTypeInfo = TypeInfoFactory.getDecimalTypeInfo(inputPrecision, inputScale); + } + return outputTypeInfo; + } + /** class for storing decimal sum value. */ @AggregationType(estimable = false) // hard to know exactly for decimals static class SumHiveDecimalWritableAgg extends SumAgg { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java index 0bc690f..1432bfb 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorGroupByOperator.java @@ -2157,7 +2157,7 @@ public void validate(String key, Object expected, Object result) { } else { assertEquals (true, arr[0] instanceof Object[]); Object[] vals = (Object[]) arr[0]; - assertEquals (2, vals.length); + assertEquals (3, vals.length); assertEquals (true, vals[0] instanceof LongWritable); LongWritable lw = (LongWritable) vals[0]; diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java index ede60b8..a3a8aa5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java @@ -156,8 +156,8 @@ public void testValidateNestedExpressions() { Vectorizer v = new Vectorizer(); v.testSetCurrentBaseWork(new MapWork()); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER)); - Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.FILTER, false)); + Assert.assertFalse(v.validateExprNodeDesc(andExprDesc, "test", VectorExpressionDescriptor.Mode.PROJECTION, false)); } /** @@ -230,8 +230,8 @@ public void testValidateSMBJoinOperator() { public void testExprNodeDynamicValue() { ExprNodeDesc exprNode = new ExprNodeDynamicValueDesc(new DynamicValue("id1", TypeInfoFactory.stringTypeInfo)); Vectorizer v = new Vectorizer(); - Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.FILTER)); - Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.PROJECTION)); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.FILTER, false)); + Assert.assertTrue(v.validateExprNodeDesc(exprNode, "Test", Mode.PROJECTION, false)); } @Test @@ -254,7 +254,7 @@ public void testExprNodeBetweenWithDynamicValue() { Vectorizer v = new Vectorizer(); v.testSetCurrentBaseWork(new MapWork()); - boolean valid = v.validateExprNodeDesc(betweenExpr, "Test", Mode.FILTER); + boolean valid = v.validateExprNodeDesc(betweenExpr, "Test", Mode.FILTER, false); Assert.assertTrue(valid); } } diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q index 131a1af..02f7c75 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q @@ -12,6 +12,9 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=textfile; set hive.llap.io.enabled=false; +-- TEMPORARY UNTIL Vectorized Text Schema Evolution works. +set hive.vectorized.complex.types.enabled=false; + -- SORT_QUERY_RESULTS -- -- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned --> all complex conversions diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q index b4a9d66..d780074 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q @@ -12,6 +12,9 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=textfile; set hive.llap.io.enabled=false; +-- TEMPORARY UNTIL Vectorized Text Schema Evolution works. +set hive.vectorized.complex.types.enabled=false; + -- SORT_QUERY_RESULTS -- -- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned --> all complex conversions diff --git ql/src/test/queries/clientpositive/vector_aggregate_9.q ql/src/test/queries/clientpositive/vector_aggregate_9.q index 04fdeec..d7322ec 100644 --- ql/src/test/queries/clientpositive/vector_aggregate_9.q +++ ql/src/test/queries/clientpositive/vector_aggregate_9.q @@ -39,9 +39,19 @@ STORED AS ORC; INSERT INTO TABLE vectortab2korc SELECT * FROM vectortab2k; -explain vectorization expression +-- SORT_QUERY_RESULTS + +explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; --- SORT_QUERY_RESULTS +select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; + +explain vectorization detail +select min(d), max(d), sum(d), avg(d) from vectortab2korc; + +select min(d), max(d), sum(d), avg(d) from vectortab2korc; + +explain vectorization detail +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc; -select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc; \ No newline at end of file +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_complex_all.q ql/src/test/queries/clientpositive/vector_complex_all.q index b71ac62..920210f 100644 --- ql/src/test/queries/clientpositive/vector_complex_all.q +++ ql/src/test/queries/clientpositive/vector_complex_all.q @@ -6,6 +6,7 @@ set hive.fetch.task.conversion=none; SET hive.vectorized.execution.enabled=true; set hive.llap.io.enabled=false; set hive.mapred.mode=nonstrict; +set hive.auto.convert.join=true; CREATE TABLE orc_create_staging ( str STRING, @@ -32,35 +33,75 @@ SELECT orc_create_staging.*, '0' FROM orc_create_staging; set hive.llap.io.enabled=true; +EXPLAIN VECTORIZATION DETAIL SELECT * FROM orc_create_complex; +SELECT * FROM orc_create_complex; + +EXPLAIN VECTORIZATION DETAIL +SELECT str FROM orc_create_complex; + SELECT str FROM orc_create_complex; +EXPLAIN VECTORIZATION DETAIL SELECT strct, mp, lst FROM orc_create_complex; +SELECT strct, mp, lst FROM orc_create_complex; + +EXPLAIN VECTORIZATION DETAIL +SELECT lst, str FROM orc_create_complex; + SELECT lst, str FROM orc_create_complex; +EXPLAIN VECTORIZATION DETAIL +SELECT mp, str FROM orc_create_complex; + SELECT mp, str FROM orc_create_complex; +EXPLAIN VECTORIZATION DETAIL SELECT strct, str FROM orc_create_complex; +SELECT strct, str FROM orc_create_complex; + +EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, str FROM orc_create_complex; + SELECT strct.B, str FROM orc_create_complex; set hive.llap.io.enabled=false; +EXPLAIN VECTORIZATION DETAIL INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2; +INSERT INTO TABLE orc_create_complex +SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2; + +EXPLAIN VECTORIZATION DETAIL +select count(*) from orc_create_complex; + select count(*) from orc_create_complex; set hive.llap.io.enabled=true; +EXPLAIN VECTORIZATION DETAIL +SELECT distinct lst, strct FROM orc_create_complex; + SELECT distinct lst, strct FROM orc_create_complex; +EXPLAIN VECTORIZATION DETAIL SELECT str, count(val) FROM orc_create_complex GROUP BY str; +SELECT str, count(val) FROM orc_create_complex GROUP BY str; + +EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B; + SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B; +EXPLAIN VECTORIZATION DETAIL +SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct, mp, lst, str; + SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct, mp, lst, str; diff --git ql/src/test/queries/clientpositive/vector_groupby_reduce.q ql/src/test/queries/clientpositive/vector_groupby_reduce.q index f23b26f..7837c19 100644 --- ql/src/test/queries/clientpositive/vector_groupby_reduce.q +++ ql/src/test/queries/clientpositive/vector_groupby_reduce.q @@ -16,20 +16,20 @@ create table store_sales_txt ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) -row format delimited fields terminated by '|' +row format delimited fields terminated by '|' stored as textfile; LOAD DATA LOCAL INPATH '../../data/files/store_sales.txt' OVERWRITE INTO TABLE store_sales_txt; @@ -47,18 +47,19 @@ create table store_sales ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_wholesale_cost_decimal decimal(38,18), + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) stored as orc tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384"); @@ -79,6 +80,7 @@ ss_sold_date_sk , ss_ticket_number , ss_quantity , ss_wholesale_cost , + cast(ss_wholesale_cost as decimal(38,18)), ss_list_price , ss_sales_price , ss_ext_discount_amt , @@ -138,23 +140,25 @@ order by m; explain vectorization expression select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number; select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number; @@ -162,10 +166,10 @@ order by ss_ticket_number; explain vectorization expression select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -173,13 +177,12 @@ group by ss_ticket_number, ss_item_sk order by ss_ticket_number, ss_item_sk; select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number, ss_item_sk -order by ss_ticket_number, ss_item_sk; - +order by ss_ticket_number, ss_item_sk; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_tablesample_rows.q ql/src/test/queries/clientpositive/vector_tablesample_rows.q index 94b2f5b..bb3c5a4 100644 --- ql/src/test/queries/clientpositive/vector_tablesample_rows.q +++ ql/src/test/queries/clientpositive/vector_tablesample_rows.q @@ -4,7 +4,7 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; set hive.mapred.mode=nonstrict; -explain vectorization expression +explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows); select 'key1', 'value1' from alltypesorc tablesample (1 rows); @@ -12,7 +12,7 @@ select 'key1', 'value1' from alltypesorc tablesample (1 rows); create table decimal_2 (t decimal(18,9)) stored as orc; -explain vectorization expression +explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows); @@ -25,12 +25,12 @@ drop table decimal_2; -- Dummy tables HIVE-13190 -explain vectorization expression +explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y; select count(1) from (select * from (Select 1 a) x order by x.a) y; -explain vectorization expression +explain vectorization detail create temporary table dual as select 1; create temporary table dual as select 1; diff --git ql/src/test/queries/clientpositive/vector_udf1.q ql/src/test/queries/clientpositive/vector_udf1.q index 6ebe58f..48d3e1e 100644 --- ql/src/test/queries/clientpositive/vector_udf1.q +++ ql/src/test/queries/clientpositive/vector_udf1.q @@ -9,20 +9,20 @@ insert overwrite table varchar_udf_1 select key, value, key, value, '2015-01-14', '2015-01-14', '2017-01-11', '2017-01-11' from src where key = '238' limit 1; -- UDFs with varchar support -explain -select +explain vectorization detail +select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1; -select +select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select upper(c2), upper(c4), @@ -35,7 +35,7 @@ select upper(c2) = upper(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select lower(c2), lower(c4), @@ -49,7 +49,7 @@ select from varchar_udf_1 limit 1; -- Scalar UDFs -explain +explain vectorization detail select ascii(c2), ascii(c4), @@ -62,20 +62,20 @@ select ascii(c2) = ascii(c4) from varchar_udf_1 limit 1; -explain -select +explain vectorization detail +select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1; -select +select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), @@ -88,7 +88,7 @@ select decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1; -explain +explain vectorization detail select instr(c2, '_'), instr(c4, '_'), @@ -101,7 +101,7 @@ select instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1; -explain +explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), @@ -114,7 +114,7 @@ select replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select reverse(c2), reverse(c4), @@ -127,7 +127,7 @@ select reverse(c2) = reverse(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), @@ -140,7 +140,7 @@ select next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1 limit 1; -explain +explain vectorization detail select months_between(d1, d3), months_between(d2, d4), @@ -153,7 +153,7 @@ select months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select length(c2), length(c4), @@ -166,7 +166,7 @@ select length(c2) = length(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), @@ -179,7 +179,7 @@ select locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1; -explain +explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), @@ -192,7 +192,7 @@ select lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization detail select ltrim(c2), ltrim(c4), @@ -205,7 +205,7 @@ select ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select c2 regexp 'val', c4 regexp 'val', @@ -218,7 +218,7 @@ select (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), @@ -231,7 +231,7 @@ select regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1; -explain +explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), @@ -244,7 +244,7 @@ select regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1; -explain +explain vectorization detail select reverse(c2), reverse(c4), @@ -257,7 +257,7 @@ select reverse(c2) = reverse(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), @@ -270,7 +270,7 @@ select rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1; -explain +explain vectorization detail select rtrim(c2), rtrim(c4), @@ -283,7 +283,7 @@ select rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1; -explain +explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) @@ -294,7 +294,7 @@ select sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1; -explain +explain vectorization detail select split(c2, '_'), split(c4, '_') @@ -305,18 +305,18 @@ select split(c4, '_') from varchar_udf_1 limit 1; -explain -select +explain vectorization detail +select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1; -select +select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1; -explain +explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), @@ -329,7 +329,7 @@ select substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1; -explain +explain vectorization detail select trim(c2), trim(c4), @@ -344,7 +344,7 @@ from varchar_udf_1 limit 1; -- Aggregate Functions -explain +explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) @@ -355,7 +355,7 @@ select compute_stats(c4, 16) from varchar_udf_1; -explain +explain vectorization detail select min(c2), min(c4) @@ -366,7 +366,7 @@ select min(c4) from varchar_udf_1; -explain +explain vectorization detail select max(c2), max(c4) diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index c97cd9f..3af9422 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -1,12 +1,13 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.reduce.enabled=true; set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -- Use ORDER BY clauses to generate 2 stages. -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -21,7 +22,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1; @@ -55,7 +56,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -70,7 +71,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1; @@ -104,7 +105,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -119,7 +120,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1; @@ -153,7 +154,7 @@ SELECT FROM alltypesorc ORDER BY c1; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorization_1.q ql/src/test/queries/clientpositive/vectorization_1.q index f71218f..b03ae71 100644 --- ql/src/test/queries/clientpositive/vectorization_1.q +++ ql/src/test/queries/clientpositive/vectorization_1.q @@ -1,8 +1,30 @@ SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +set hive.vectorized.execution.reduce.enabled=true; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))); + SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), diff --git ql/src/test/queries/clientpositive/vectorization_10.q ql/src/test/queries/clientpositive/vectorization_10.q index c5f4d43..8b62068 100644 --- ql/src/test/queries/clientpositive/vectorization_10.q +++ ql/src/test/queries/clientpositive/vectorization_10.q @@ -3,6 +3,30 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))); + SELECT cdouble, ctimestamp1, ctinyint, diff --git ql/src/test/queries/clientpositive/vectorization_11.q ql/src/test/queries/clientpositive/vectorization_11.q index 3830ea9..aa05fe5 100644 --- ql/src/test/queries/clientpositive/vectorization_11.q +++ ql/src/test/queries/clientpositive/vectorization_11.q @@ -3,6 +3,21 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))); + SELECT cstring1, cboolean1, cdouble, diff --git ql/src/test/queries/clientpositive/vectorization_12.q ql/src/test/queries/clientpositive/vectorization_12.q index 0728ba9..18d9184 100644 --- ql/src/test/queries/clientpositive/vectorization_12.q +++ ql/src/test/queries/clientpositive/vectorization_12.q @@ -4,6 +4,38 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1; + SELECT cbigint, cboolean1, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_13.q ql/src/test/queries/clientpositive/vectorization_13.q index 84ae994..7250a0c 100644 --- ql/src/test/queries/clientpositive/vectorization_13.q +++ ql/src/test/queries/clientpositive/vectorization_13.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_14.q ql/src/test/queries/clientpositive/vectorization_14.q index 825fd63..2547500 100644 --- ql/src/test/queries/clientpositive/vectorization_14.q +++ ql/src/test/queries/clientpositive/vectorization_14.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_15.q ql/src/test/queries/clientpositive/vectorization_15.q index 5c48c58..bb33ffd 100644 --- ql/src/test/queries/clientpositive/vectorization_15.q +++ ql/src/test/queries/clientpositive/vectorization_15.q @@ -3,9 +3,12 @@ set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; set hive.fetch.task.conversion=none; +-- Until HIVE-16756: 'Vectorization: LongColModuloLongColumn throws "java.lang.ArithmeticException: / by zero"' is resolved, do not vectorize Reducers +set hive.vectorized.execution.reduce.enabled=false; + -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION +EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, diff --git ql/src/test/queries/clientpositive/vectorization_16.q ql/src/test/queries/clientpositive/vectorization_16.q index 822c824..e9cb5c3 100644 --- ql/src/test/queries/clientpositive/vectorization_16.q +++ ql/src/test/queries/clientpositive/vectorization_16.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION +EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorization_17.q ql/src/test/queries/clientpositive/vectorization_17.q index 57cdc41..4806232 100644 --- ql/src/test/queries/clientpositive/vectorization_17.q +++ ql/src/test/queries/clientpositive/vectorization_17.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION +EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, diff --git ql/src/test/queries/clientpositive/vectorization_2.q ql/src/test/queries/clientpositive/vectorization_2.q index 4941d1e..5a1269c 100644 --- ql/src/test/queries/clientpositive/vectorization_2.q +++ ql/src/test/queries/clientpositive/vectorization_2.q @@ -3,6 +3,29 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))); + SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), diff --git ql/src/test/queries/clientpositive/vectorization_3.q ql/src/test/queries/clientpositive/vectorization_3.q index 2e0350a..dea7936 100644 --- ql/src/test/queries/clientpositive/vectorization_3.q +++ ql/src/test/queries/clientpositive/vectorization_3.q @@ -4,6 +4,31 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))); + SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), diff --git ql/src/test/queries/clientpositive/vectorization_4.q ql/src/test/queries/clientpositive/vectorization_4.q index ba603c8..e7c88e6 100644 --- ql/src/test/queries/clientpositive/vectorization_4.q +++ ql/src/test/queries/clientpositive/vectorization_4.q @@ -3,6 +3,29 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))); + SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), diff --git ql/src/test/queries/clientpositive/vectorization_5.q ql/src/test/queries/clientpositive/vectorization_5.q index e2d4d0a..c3b335a 100644 --- ql/src/test/queries/clientpositive/vectorization_5.q +++ ql/src/test/queries/clientpositive/vectorization_5.q @@ -3,6 +3,26 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))); + SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), diff --git ql/src/test/queries/clientpositive/vectorization_6.q ql/src/test/queries/clientpositive/vectorization_6.q index f55a2fb..f38b513 100644 --- ql/src/test/queries/clientpositive/vectorization_6.q +++ ql/src/test/queries/clientpositive/vectorization_6.q @@ -3,6 +3,27 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS +EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))); + SELECT cboolean1, cfloat, cstring1, diff --git ql/src/test/queries/clientpositive/vectorization_7.q ql/src/test/queries/clientpositive/vectorization_7.q index bf3a1c2..a85585b 100644 --- ql/src/test/queries/clientpositive/vectorization_7.q +++ ql/src/test/queries/clientpositive/vectorization_7.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, diff --git ql/src/test/queries/clientpositive/vectorization_8.q ql/src/test/queries/clientpositive/vectorization_8.q index d43db26..3aa0eb5 100644 --- ql/src/test/queries/clientpositive/vectorization_8.q +++ ql/src/test/queries/clientpositive/vectorization_8.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, diff --git ql/src/test/queries/clientpositive/vectorization_9.q ql/src/test/queries/clientpositive/vectorization_9.q index 822c824..e9cb5c3 100644 --- ql/src/test/queries/clientpositive/vectorization_9.q +++ ql/src/test/queries/clientpositive/vectorization_9.q @@ -5,7 +5,7 @@ set hive.fetch.task.conversion=none; -- SORT_QUERY_RESULTS -EXPLAIN VECTORIZATION +EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, diff --git ql/src/test/queries/clientpositive/vectorized_distinct_gby.q ql/src/test/queries/clientpositive/vectorized_distinct_gby.q index 4339a5f..44a06f5 100644 --- ql/src/test/queries/clientpositive/vectorized_distinct_gby.q +++ ql/src/test/queries/clientpositive/vectorized_distinct_gby.q @@ -1,6 +1,7 @@ set hive.mapred.mode=nonstrict; set hive.explain.user=false; SET hive.vectorized.execution.enabled=true; +SET hive.vectorized.execution.reduce.enabled=true; set hive.fetch.task.conversion=none; SET hive.map.groupby.sorted=true; @@ -8,8 +9,10 @@ SET hive.map.groupby.sorted=true; create table dtest(a int, b int) clustered by (a) sorted by (a) into 1 buckets stored as orc; insert into table dtest select c,b from (select array(300,300,300,300,300) as a, 1 as b from src order by a limit 1) y lateral view explode(a) t1 as c; -explain vectorization select sum(distinct a), count(distinct a) from dtest; +explain vectorization detail +select sum(distinct a), count(distinct a) from dtest; select sum(distinct a), count(distinct a) from dtest; -explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; +explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc; diff --git ql/src/test/queries/clientpositive/vectorized_timestamp.q ql/src/test/queries/clientpositive/vectorized_timestamp.q index c110597..8de4e86 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp.q @@ -1,38 +1,41 @@ set hive.fetch.task.conversion=none; -set hive.explain.user=true; +set hive.explain.user=false; +set hive.vectorized.execution.reduce.enabled=true; DROP TABLE IF EXISTS test; CREATE TABLE test(ts TIMESTAMP) STORED AS ORC; INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000'), ('9999-12-31 23:59:59.999999999'); SET hive.vectorized.execution.enabled = false; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN VECTORIZATION EXPRESSION SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; -SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; - -EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000'); - SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000'); SET hive.vectorized.execution.enabled = true; -EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test; SELECT ts FROM test; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test; -EXPLAIN VECTORIZATION EXPRESSION +EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000'); SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000'); + +EXPLAIN VECTORIZATION DETAIL +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test; + +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test; + +EXPLAIN VECTORIZATION DETAIL +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test; + +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test; diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index d26400d..0e9e120 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -72,8 +72,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -153,8 +155,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -250,8 +254,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -331,8 +337,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index d0cafaa..f57481d 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -156,24 +156,42 @@ STAGE PLANS: TableScan alias: part_change_various_various_struct1 Statistics: Num rows: 6 Data size: 4734 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, s1:struct, b:string + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -438,24 +456,42 @@ STAGE PLANS: TableScan alias: part_add_various_various_struct2 Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s2:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -648,24 +684,42 @@ STAGE PLANS: TableScan alias: part_add_to_various_various_struct4 Statistics: Num rows: 4 Data size: 1172 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s3:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index 97270fc..0ea2972 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -172,7 +172,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 @@ -454,7 +454,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 @@ -664,7 +664,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index b35bccc..70be462 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -172,7 +172,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s1] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 @@ -454,7 +454,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s2] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 @@ -664,7 +664,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type struct of Column[s3] not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 6a0f490..e63cbf8 100644 --- ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -982,9 +982,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -1025,9 +1027,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial @@ -1107,9 +1111,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -1150,9 +1156,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 8a23d6a..cf3dc23 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -101,10 +101,10 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -142,17 +142,24 @@ STAGE PLANS: Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap @@ -160,26 +167,50 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,18), VALUE._col2:decimal(38,18), VALUE._col3:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDecimal(col 0) -> decimal(38,18), VectorUDAFMaxDecimal(col 1) -> decimal(38,18), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 3) -> decimal(38,18) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -201,3 +232,265 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### -4997414117561.546875000000000000 4994550248722.298828000000000000 -10252745435816.024410000000000000 -5399023399.587163986308583465 +PREHOOK: query: explain vectorization detail +select min(d), max(d), sum(d), avg(d) from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select min(d), max(d), sum(d), avg(d) from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: d (type: double) + outputColumnNames: d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(d), max(d), sum(d), avg(d) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 5) -> double, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFSumDouble(col 5) -> double, VectorUDAFAvgDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [5] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:double, VALUE._col3:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 0) -> double, VectorUDAFMaxDouble(col 1) -> double, VectorUDAFSumDouble(col 2) -> double, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(d), max(d), sum(d), avg(d) from vectortab2korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +POSTHOOK: query: select min(d), max(d), sum(d), avg(d) from vectortab2korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +-4999829.07 4997627.14 -1.7516847286999977E8 -92193.93308947356 +PREHOOK: query: explain vectorization detail +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts), sum(ts), avg(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 10) -> timestamp, VectorUDAFMaxTimestamp(col 10) -> timestamp, VectorUDAFSumTimestamp(col 10) -> double, VectorUDAFAvgTimestamp(col 10) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [10] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp, VALUE._col2:double, VALUE._col3:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFSumDouble(col 2) -> double, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +POSTHOOK: query: select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +2013-02-18 21:06:48 2081-02-22 01:21:53 4.591384881081E12 2.4254542425150557E9 diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index a98c34f..def04a8 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -97,8 +97,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -123,8 +125,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -231,8 +235,10 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: hash @@ -257,9 +263,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -276,8 +284,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -305,8 +315,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -439,8 +451,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: hash @@ -487,8 +501,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: hash @@ -516,9 +532,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int) mode: mergepartial @@ -571,9 +589,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int) mode: mergepartial @@ -703,8 +723,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -729,8 +751,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -844,8 +868,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -870,8 +896,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1009,8 +1037,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1035,8 +1065,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1162,8 +1194,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1188,8 +1222,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1360,8 +1396,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1385,8 +1423,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1486,8 +1526,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1512,8 +1554,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1632,8 +1676,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1658,8 +1704,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1781,8 +1829,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1807,8 +1857,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -2175,8 +2227,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: hash @@ -2204,9 +2258,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_between_in.q.out ql/src/test/results/clientpositive/llap/vector_between_in.q.out index 18dd1c6..80c3060 100644 --- ql/src/test/results/clientpositive/llap/vector_between_in.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_in.q.out @@ -154,8 +154,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -193,8 +195,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -358,8 +362,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -397,8 +403,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -754,8 +762,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -793,8 +803,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1104,9 +1116,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1147,9 +1161,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1242,9 +1258,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1285,9 +1303,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1380,9 +1400,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1423,9 +1445,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1518,9 +1542,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1561,9 +1587,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 160a43b..9f059b9 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -174,8 +174,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 21) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -256,8 +258,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -368,9 +372,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 10 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: bin (type: binary) mode: hash @@ -411,9 +417,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: binary) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index f06d49a..2b8aaaa 100644 --- ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -145,13 +145,14 @@ STAGE PLANS: Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -160,6 +161,10 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) @@ -168,21 +173,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1) -> double, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFAvgDecimalFinal(col 3) -> decimal(16,4) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -190,6 +206,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) diff --git ql/src/test/results/clientpositive/llap/vector_char_2.q.out ql/src/test/results/clientpositive/llap/vector_char_2.q.out index 65fafb0..48c6281 100644 --- ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -98,9 +98,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash @@ -142,9 +144,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial @@ -294,9 +298,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash @@ -338,9 +344,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0, 1] keys: KEY._col0 (type: char(20)) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index d57d39f..55a1d42 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -54,8 +54,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: string) mode: hash @@ -75,8 +77,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial @@ -226,9 +230,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 4) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -269,9 +275,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index 1107f82..4503cc4 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -66,6 +66,74 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] POSTHOOK: Lineage: orc_create_complex.val SIMPLE [] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct _c1 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT * FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct), val (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1, 2, 3, 4] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -75,9 +143,77 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### orc_create_complex.str orc_create_complex.mp orc_create_complex.lst orc_create_complex.strct orc_create_complex.val -line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a":"one","b":"two"} 0 +line1 {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] {"a":"one","b":"two"} 0 line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} 0 line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} 0 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT str FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT str FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: str (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT str FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -90,6 +226,74 @@ str line1 line2 line3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, mp, lst FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, mp, lst FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: strct (type: struct), mp (type: map), lst (type: array) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 2] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [1, 2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT strct, mp, lst FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -99,9 +303,77 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### strct mp lst -{"a":"one","b":"two"} {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] +{"a":"one","b":"two"} {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] {"a":"three","b":"four"} {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"five","b":"six"} {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT lst, str FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT lst, str FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: lst (type: array), str (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 0] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 2] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT lst, str FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -114,6 +386,74 @@ lst str ["a","b","c"] line1 ["d","e","f"] line2 ["g","h","i"] line3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT mp, str FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT mp, str FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: mp (type: map), str (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 1] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT mp, str FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -123,9 +463,77 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### mp str -{"key13":"value13","key11":"value11","key12":"value12"} line1 +{"key11":"value11","key12":"value12","key13":"value13"} line1 {"key21":"value21","key22":"value22","key23":"value23"} line2 {"key31":"value31","key32":"value32","key33":"value33"} line3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, str FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, str FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: strct (type: struct), str (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 0] + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT strct, str FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -138,6 +546,58 @@ strct str {"a":"one","b":"two"} line1 {"a":"three","b":"four"} line2 {"a":"five","b":"six"} line3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, str FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, str FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Pruned Column Paths: strct.b + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: strct.b (type: string), str (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[strct].b + vectorized: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT strct.B, str FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -150,7 +610,237 @@ b str two line1 four line2 six line3 -Warning: Shuffle Join MERGEJOIN[15][tables = [$hdt$_1, $hdt$_2, $hdt$_3, $hdt$_0]] in Stage 'Reducer 2' is a cross product +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +INSERT INTO TABLE orc_create_complex +SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +INSERT INTO TABLE orc_create_complex +SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 4 <- Map 1 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_staging + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 + Map 2 + Map Operator Tree: + TableScan + alias: spam2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 + Map 3 + Map Operator Tree: + TableScan + alias: spam1 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [] + dataColumns: str:string, mp:map, lst:array, strct:struct + partitionColumnCount: 0 + Map 4 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + keys: + 0 + 1 + 2 + 3 + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: One MapJoin Condition IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col6 + input vertices: + 0 Map 1 + 1 Map 2 + 2 Map 3 + Statistics: Num rows: 500 Data size: 143000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 500 Data size: 143000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 500 Data size: 143000 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: string, map, array, struct + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: false + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.orc_create_complex + + Stage: Stage-3 + Stats-Aggr Operator + +Warning: Map Join MAPJOIN[15][bigTable=?] in task 'Map 4' is a cross product PREHOOK: query: INSERT INTO TABLE orc_create_complex SELECT orc_create_staging.*, src1.key FROM orc_create_staging cross join src src1 cross join orc_create_staging spam1 cross join orc_create_staging spam2 PREHOOK: type: QUERY @@ -169,6 +859,126 @@ POSTHOOK: Lineage: orc_create_complex.str SIMPLE [(orc_create_staging)orc_create POSTHOOK: Lineage: orc_create_complex.strct SIMPLE [(orc_create_staging)orc_create_staging.FieldSchema(name:strct, type:struct, comment:null), ] POSTHOOK: Lineage: orc_create_complex.val SIMPLE [(src)src1.FieldSchema(name:key, type:string, comment:default), ] orc_create_staging.str orc_create_staging.mp orc_create_staging.lst orc_create_staging.strct src1.key +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(*) from orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +select count(*) from orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + Group By Vectorization: + aggregators: VectorUDAFCountStar(*) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: select count(*) from orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -179,6 +989,83 @@ POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### _c0 13503 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT distinct lst, strct FROM orc_create_complex +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT distinct lst, strct FROM orc_create_complex +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: lst (type: array), strct (type: struct) + outputColumnNames: lst, strct + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: lst (type: array), strct (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array), _col1 (type: struct) + sort order: ++ + Map-reduce partition columns: _col0 (type: array), _col1 (type: struct) + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type LIST not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: array), KEY._col1 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT distinct lst, strct FROM orc_create_complex PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -191,6 +1078,135 @@ lst strct ["a","b","c"] {"a":"one","b":"two"} ["d","e","f"] {"a":"three","b":"four"} ["g","h","i"] {"a":"five","b":"six"} +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT str, count(val) FROM orc_create_complex GROUP BY str +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT str, count(val) FROM orc_create_complex GROUP BY str +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4] + Select Operator + expressions: str (type: string), val (type: string) + outputColumnNames: str, val + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 4] + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(val) + Group By Vectorization: + aggregators: VectorUDAFCount(col 4) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + keys: str (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1] + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + includeColumns: [0, 4] + dataColumns: str:string, mp:map, lst:array, strct:struct, val:string + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT str, count(val) FROM orc_create_complex GROUP BY str PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -203,6 +1219,107 @@ str _c1 line3 4501 line1 4501 line2 4501 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Pruned Column Paths: strct.b + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: strct.b (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Select expression for SELECT operator: Could not vectorize expression (mode = PROJECTION): Column[strct].b + vectorized: false + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:string, VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0] + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT strct.B, count(val) FROM orc_create_complex GROUP BY strct.B PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex @@ -215,6 +1332,90 @@ strct.b _c1 six 4501 two 4501 four 4501 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct, mp, lst, str +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct, mp, lst, str +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orc_create_complex + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: str (type: string), mp (type: map), lst (type: array), strct (type: struct), val (type: string) + outputColumnNames: str, mp, lst, strct, val + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(val) + keys: str (type: string), mp (type: map), lst (type: array), strct (type: struct) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) + Statistics: Num rows: 13503 Data size: 15460932 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type MAP not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing complex type MAP not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: map), KEY._col2 (type: array), KEY._col3 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col1 (type: map), _col2 (type: array), _col0 (type: string), _col4 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6751 Data size: 7729893 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT strct, mp, lst, str, count(val) FROM orc_create_complex GROUP BY strct, mp, lst, str PREHOOK: type: QUERY PREHOOK: Input: default@orc_create_complex diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 5ea4b0f..e389cd3 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -45,12 +45,23 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 9173 Data size: 2309110 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 9173 Data size: 2309110 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -58,51 +69,77 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Map 2 Statistics: Num rows: 10090 Data size: 2540021 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 10090 Data size: 2540021 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan alias: test Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: a is not null (type: boolean) Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: a (type: int), b (type: map) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: map) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type map of Column[b] not supported - vectorized: false + allNative: true + usesVectorUDFAdaptor: false + vectorized: true Stage: Stage-0 Fetch Operator @@ -184,7 +221,14 @@ STAGE PLANS: TableScan alias: test2b Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: a is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -193,6 +237,10 @@ STAGE PLANS: keys: 0 a (type: int) 1 a[1] (type: int) + Map Join Vectorization: + className: VectorMapJoinInnerLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true outputColumnNames: _col0, _col4 input vertices: 1 Map 2 @@ -200,22 +248,31 @@ STAGE PLANS: Select Operator expressions: _col0 (type: int), _col4 (type: array) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Small Table expression for MAPJOIN operator: Data type array of Column[a] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Map 2 Map Operator Tree: TableScan @@ -236,7 +293,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: Unexpected hive type name array vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_count.q.out ql/src/test/results/clientpositive/llap/vector_count.q.out index 5fa5a82..21d92cd 100644 --- ql/src/test/results/clientpositive/llap/vector_count.q.out +++ ql/src/test/results/clientpositive/llap/vector_count.q.out @@ -84,9 +84,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash @@ -194,9 +196,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index b9d0f06..d45a15f 100644 --- ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1267,9 +1267,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 16 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash @@ -1307,9 +1309,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -1320,8 +1324,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -1349,8 +1355,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index ab38382..8ea03f7 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -71,9 +71,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash @@ -114,9 +116,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial @@ -229,13 +233,14 @@ STAGE PLANS: Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -244,6 +249,10 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap @@ -251,34 +260,56 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 5) -> decimal(34,14), VectorUDAFStdPopFinal(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double, VectorUDAFCountMerge(col 8) -> bigint, VectorUDAFMaxDecimal(col 9) -> decimal(23,14), VectorUDAFMinDecimal(col 10) -> decimal(23,14), VectorUDAFSumDecimal(col 11) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 12) -> decimal(37,18), VectorUDAFStdPopFinal(col 13) -> double, VectorUDAFStdSampFinal(col 14) -> double, VectorUDAFCountMerge(col 15) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 15, val 1) -> boolean predicate: (_col15 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5d62086..3f32eb2 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -584,17 +584,22 @@ STAGE PLANS: Group By Operator aggregations: avg(dec), sum(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap @@ -602,26 +607,39 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFAvgDecimalFinal(col 0) -> decimal(34,14), VectorUDAFSumDecimal(col 1) -> decimal(38,18) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index c271b82..56127a5 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -1666,7 +1666,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) @@ -2338,7 +2338,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: stddev(VALUE._col0), variance(VALUE._col1) @@ -2425,7 +2425,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) diff --git ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out index f17583f..b7f6a80 100644 --- ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_distinct_2.q.out @@ -142,9 +142,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash @@ -182,9 +184,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_empty_where.q.out ql/src/test/results/clientpositive/llap/vector_empty_where.q.out index f2bc0a5..b250332 100644 --- ql/src/test/results/clientpositive/llap/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/llap/vector_empty_where.q.out @@ -47,9 +47,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -87,9 +89,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -100,8 +104,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -129,8 +135,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -202,9 +210,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -242,9 +252,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -255,8 +267,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -284,8 +298,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -365,9 +381,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -405,9 +423,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -418,8 +438,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -447,8 +469,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -528,9 +552,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -568,9 +594,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -581,8 +609,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -610,8 +640,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index ffeab2c..4399554 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -94,9 +94,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: PARTIAL1 vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 @@ -124,9 +126,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 5bfa9b5..a91b715 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -94,9 +94,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: PARTIAL1 vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: partial1 @@ -124,9 +126,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: final diff --git ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out index 0242cbd..cd9ff27 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_3.q.out @@ -144,9 +144,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash @@ -187,9 +189,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 2) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index a4ef2e7..22a71e4 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -79,9 +79,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, ConstantVectorExpression(val 0) -> 2:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: key (type: int), value (type: int), 0 (type: int) mode: hash @@ -145,9 +147,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1, ConstantVectorExpression(val 1) -> 4:long native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int), 1 (type: int) mode: mergepartial @@ -253,9 +257,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, ConstantVectorExpression(val 0) -> 2:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: int), _col1 (type: int), 0 (type: int) mode: hash @@ -310,9 +316,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1, col 2 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out index b30aabd..9596415 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -79,7 +79,7 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) @@ -149,7 +149,7 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) @@ -245,7 +245,7 @@ STAGE PLANS: Execution mode: llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) @@ -260,7 +260,7 @@ STAGE PLANS: Statistics: Num rows: 48 Data size: 12240 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), count(VALUE._col1) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index bfa87bb..16b716c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -131,8 +131,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -148,9 +150,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: key (type: string) mode: hash @@ -218,8 +222,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 @@ -245,9 +251,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index 39e81f3..c000b7c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -11,20 +11,20 @@ PREHOOK: query: create table store_sales_txt ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) -row format delimited fields terminated by '|' +row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -42,20 +42,20 @@ POSTHOOK: query: create table store_sales_txt ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) -row format delimited fields terminated by '|' +row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -81,18 +81,19 @@ PREHOOK: query: create table store_sales ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_wholesale_cost_decimal decimal(38,18), + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) stored as orc tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") @@ -112,18 +113,19 @@ POSTHOOK: query: create table store_sales ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_wholesale_cost_decimal decimal(38,18), + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) stored as orc tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") @@ -144,6 +146,7 @@ ss_sold_date_sk , ss_ticket_number , ss_quantity , ss_wholesale_cost , + cast(ss_wholesale_cost as decimal(38,18)), ss_list_price , ss_sales_price , ss_ext_discount_amt , @@ -173,6 +176,7 @@ ss_sold_date_sk , ss_ticket_number , ss_quantity , ss_wholesale_cost , + cast(ss_wholesale_cost as decimal(38,18)), ss_list_price , ss_sales_price , ss_ext_discount_amt , @@ -190,27 +194,28 @@ POSTHOOK: Input: default@store_sales_txt POSTHOOK: Output: default@store_sales POSTHOOK: Lineage: store_sales.ss_addr_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_addr_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_cdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_cdemo_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_customer_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_customer_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_hdemo_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_item_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_item_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_promo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_promo_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_quantity SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_quantity, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_sold_date_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_time_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost_decimal EXPRESSION [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:double, comment:null), ] PREHOOK: query: explain vectorization expression select ss_ticket_number @@ -250,10 +255,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number @@ -261,18 +266,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -281,7 +288,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -306,14 +313,16 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -321,7 +330,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -340,19 +349,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4820 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4820 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -449,10 +458,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number @@ -460,18 +469,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -480,7 +491,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -504,27 +515,31 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0] keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -532,7 +547,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -540,7 +555,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -558,13 +573,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -686,24 +701,26 @@ POSTHOOK: Input: default@store_sales 82 PREHOOK: query: explain vectorization expression select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number @@ -729,41 +746,50 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity - Select Vectorization: - className: VectorSelectOperator + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ss_quantity) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 10) -> int - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 9, col 2 - native: false - projectedOutputColumns: [0] - keys: ss_ticket_number (type: int), ss_item_sk (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + predicateExpression: FilterLongColEqualLongScalar(col 9, val 1) -> boolean + predicate: (ss_ticket_number = 1) (type: boolean) + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_quantity (type: int), ss_wholesale_cost (type: double), ss_wholesale_cost_decimal (type: decimal(38,18)) + outputColumnNames: ss_item_sk, ss_quantity, ss_wholesale_cost, ss_wholesale_cost_decimal + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11, 12] + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ss_quantity), max(ss_wholesale_cost), max(ss_wholesale_cost_decimal) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int, VectorUDAFMaxDouble(col 11) -> double, VectorUDAFMaxDecimal(col 12) -> decimal(38,18) + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 2 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] + keys: ss_item_sk (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -785,48 +811,53 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), max(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFMinLong(col 2) -> int + aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxDouble(col 2) -> double, VectorUDAFMaxDecimal(col 3) -> decimal(38,18) className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true - keyExpressions: col 0, col 1 + keyExpressions: col 0 native: false - projectedOutputColumns: [0] - keys: KEY._col0 (type: int), KEY._col1 (type: int) + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) + outputColumnNames: _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumns: [0, 1, 2, 3] + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), sum(_col2) + aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFSumDouble(col 2) -> double, VectorUDAFAvgDouble(col 2) -> struct, VectorUDAFSumDecimal(col 3) -> decimal(38,18), VectorUDAFAvgDecimal(col 3) -> struct className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true - keyExpressions: col 0 + keyExpressions: ConstantVectorExpression(val 1) -> 4:long native: false - projectedOutputColumns: [0, 1] - keys: _col1 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + keys: 1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -837,24 +868,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), sum(VALUE._col5), avg(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 1) -> bigint, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFAvgFinal(col 3) -> double, VectorUDAFSumDouble(col 4) -> double, VectorUDAFAvgFinal(col 5) -> double, VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 7) -> decimal(38,18) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 1, 2, 3, 4, 5, 6, 7] + selectExpressions: ConstantVectorExpression(val 1) -> 8:long + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -863,12 +910,13 @@ STAGE PLANS: ListSink PREHOOK: query: select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number @@ -876,106 +924,26 @@ PREHOOK: type: QUERY PREHOOK: Input: default@store_sales #### A masked pattern was here #### POSTHOOK: query: select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales #### A masked pattern was here #### -1 85411 816 -2 157365 812 -3 147948 710 -4 69545 411 -5 163232 840 -6 86307 627 -7 114874 563 -8 117953 662 -9 173250 690 -10 60338 602 -11 138545 657 -12 97181 586 -13 109484 555 -14 137333 442 -15 176829 652 -16 115004 654 -17 105008 460 -18 165135 738 -19 128252 831 -20 104789 374 -21 72771 469 -22 128153 449 -23 110253 603 -24 100662 1029 -25 118714 760 -26 81596 502 -27 164068 871 -28 58632 409 -29 133777 417 -30 130451 772 -31 114967 586 -32 142021 592 -33 151818 691 -34 112559 662 -35 137027 780 -36 118285 538 -37 94528 401 -38 81368 521 -39 101064 937 -40 84435 480 -41 112444 688 -42 95731 840 -43 57298 410 -44 159880 839 -45 68919 474 -46 111212 374 -47 78210 416 -48 94459 445 -49 90879 589 -50 37821 407 -51 124927 612 -52 98099 489 -53 138706 609 -54 87478 354 -55 90290 406 -56 78812 372 -57 101175 597 -58 88044 202 -59 104582 753 -60 99218 900 -61 66514 392 -62 126713 527 -63 98778 648 -64 131659 380 -65 86990 494 -66 108808 492 -67 75250 711 -68 91671 548 -69 92821 405 -70 75021 319 -71 124484 748 -72 161470 744 -73 104358 621 -74 88609 688 -75 92940 649 -76 75853 580 -77 124755 873 -78 98285 573 -79 160595 581 -80 151471 704 -81 105109 429 -82 55611 254 +1 85411 816 58.285714285714285 621.35 44.38214285714286 621.350000000000000000 44.382142857142857143 PREHOOK: query: explain vectorization expression select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -984,10 +952,10 @@ order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -1015,31 +983,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: double), ss_wholesale_cost_decimal (type: decimal(38,18)) + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity, ss_wholesale_cost, ss_wholesale_cost_decimal Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumns: [2, 9, 10, 11, 12] + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ss_quantity) + aggregations: min(ss_quantity), max(ss_wholesale_cost), max(ss_wholesale_cost_decimal) Group By Vectorization: - aggregators: VectorUDAFMinLong(col 10) -> int + aggregators: VectorUDAFMinLong(col 10) -> int, VectorUDAFMaxDouble(col 11) -> double, VectorUDAFMaxDecimal(col 12) -> decimal(38,18) className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9, col 2 native: false - projectedOutputColumns: [0] + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1048,8 +1018,8 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1071,48 +1041,60 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), max(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFMinLong(col 2) -> int + aggregators: VectorUDAFMinLong(col 2) -> int, VectorUDAFMaxDouble(col 3) -> double, VectorUDAFMaxDecimal(col 4) -> decimal(38,18) className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false - projectedOutputColumns: [0] + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumns: [1, 0, 2, 3, 4] + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) + aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2) -> bigint + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFAvgLongComplete(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFAvgDoubleComplete(col 3) -> double, VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFAvgDecimalComplete(col 4) -> decimal(38,18) className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true keyExpressions: col 0, col 1 native: false - projectedOutputColumns: [0] + vectorProcessingMode: STREAMING + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: _col1 (type: int), _col0 (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1124,19 +1106,19 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: decimal(38,18)), VALUE._col5 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1149,10 +1131,10 @@ STAGE PLANS: ListSink PREHOOK: query: select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -1162,10 +1144,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@store_sales #### A masked pattern was here #### POSTHOOK: query: select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -1174,1003 +1156,1003 @@ order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales #### A masked pattern was here #### -1 49 5 -1 173 65 -1 1553 50 -1 3248 58 -1 3617 79 -1 4553 100 -1 4583 72 -1 4682 44 -1 5527 88 -1 5981 14 -1 10993 91 -1 13283 37 -1 13538 14 -1 13631 99 -2 1363 4 -2 2930 36 -2 3740 49 -2 6928 65 -2 7654 25 -2 9436 79 -2 10768 30 -2 12068 74 -2 12223 78 -2 13340 71 -2 13927 93 -2 14701 58 -2 15085 88 -2 15782 62 -2 17420 NULL -3 246 96 -3 1531 NULL -3 3525 42 -3 4698 98 -3 5355 53 -3 10693 27 -3 12447 82 -3 13021 64 -3 14100 79 -3 14443 4 -3 15786 56 -3 16869 4 -3 17263 17 -3 17971 88 -4 163 17 -4 1576 74 -4 5350 86 -4 5515 23 -4 6988 23 -4 7990 56 -4 8452 27 -4 9685 21 -4 11036 41 -4 12790 43 -5 1808 NULL -5 1940 60 -5 5842 50 -5 6068 76 -5 6466 36 -5 11324 52 -5 11590 15 -5 12650 66 -5 13562 64 -5 13958 60 -5 14599 83 -5 14686 91 -5 15752 66 -5 16195 50 -5 16792 71 -6 2549 62 -6 2647 100 -6 3049 31 -6 3291 100 -6 6437 72 -6 8621 NULL -6 10355 94 -6 10895 1 -6 11705 61 -6 13245 64 -6 13513 42 -7 4627 9 -7 4795 73 -7 4833 88 -7 5183 51 -7 5905 69 -7 8955 54 -7 9751 4 -7 10487 52 -7 12571 82 -7 15179 12 -7 15333 NULL -7 17255 69 -8 665 31 -8 4183 90 -8 5929 83 -8 7115 54 -8 11365 7 -8 11893 95 -8 12041 95 -8 13427 87 -8 16671 20 -8 17119 51 -8 17545 49 -9 69 11 -9 889 6 -9 1185 62 -9 4623 34 -9 7945 83 -9 8334 71 -9 12027 27 -9 12969 59 -9 13483 NULL -9 13717 53 -9 15133 15 -9 16083 32 -9 16363 54 -9 16461 66 -9 16659 84 -9 17310 33 -10 755 74 -10 1425 92 -10 1511 76 -10 3433 83 -10 3933 52 -10 4357 17 -10 5863 47 -10 9811 28 -10 13803 66 -10 15447 67 -11 157 84 -11 1315 70 -11 7519 68 -11 7608 66 -11 9901 57 -11 10699 33 -11 11490 NULL -11 11991 38 -11 12438 16 -11 15157 96 -11 15649 33 -11 17226 11 -11 17395 85 -12 373 57 -12 1591 82 -12 4888 56 -12 6148 36 -12 6248 36 -12 9616 66 -12 9788 73 -12 13399 46 -12 14746 26 -12 14944 9 -12 15440 99 -13 868 NULL -13 1760 12 -13 1898 NULL -13 2108 9 -13 2191 NULL -13 4430 73 -13 5971 80 -13 6085 58 -13 6140 15 -13 6682 80 -13 7640 48 -13 7723 27 -13 10096 12 -13 11758 34 -13 16894 87 -13 17240 20 -14 177 41 -14 769 20 -14 4507 4 -14 10175 19 -14 11549 6 -14 11653 60 -14 11817 81 -14 12587 NULL -14 13069 77 -14 13515 57 -14 13845 17 -14 16741 46 -14 16929 14 -15 4241 21 -15 4505 59 -15 4777 28 -15 7391 98 -15 8336 15 -15 8353 NULL -15 8690 32 -15 8707 21 -15 10361 39 -15 11659 80 -15 13172 25 -15 16619 81 -15 17267 7 -15 17330 82 -15 17564 26 -15 17857 38 -16 457 60 -16 1888 4 -16 4144 94 -16 6008 59 -16 7504 51 -16 8887 35 -16 9769 42 -16 9790 17 -16 9997 94 -16 11168 86 -16 11920 29 -16 16226 13 -16 17246 70 -17 2092 37 -17 4678 34 -17 6811 70 -17 9214 57 -17 10543 54 -17 11203 21 -17 13177 45 -17 13826 32 -17 15781 76 -17 17683 34 -18 2440 40 -18 5251 41 -18 7378 94 -18 8779 9 -18 8884 18 -18 9886 62 -18 11584 76 -18 11890 7 -18 12602 81 -18 12826 93 -18 12860 18 -18 14011 95 -18 14372 76 -18 14377 15 -18 17995 13 -19 1094 48 -19 3133 96 -19 3376 84 -19 4882 84 -19 6772 97 -19 7087 1 -19 7814 29 -19 8662 97 -19 9094 49 -19 9346 39 -19 10558 82 -19 10651 46 -19 11914 59 -19 16330 NULL -19 17539 20 -20 1451 89 -20 2618 4 -20 5312 9 -20 5425 15 -20 5483 8 -20 6026 21 -20 7207 90 -20 8714 NULL -20 9086 4 -20 9800 32 -20 13601 17 -20 14935 NULL -20 15131 85 -21 230 48 -21 1810 59 -21 2870 50 -21 5170 45 -21 5998 51 -21 6476 49 -21 9187 14 -21 12266 47 -21 14368 18 -21 14396 88 -22 9985 70 -22 10474 31 -22 11599 66 -22 12415 10 -22 15310 15 -22 16396 85 -22 16922 88 -22 17392 14 -22 17660 70 -23 319 86 -23 7242 37 -23 8181 13 -23 8413 1 -23 9093 38 -23 9097 81 -23 11220 91 -23 11257 64 -23 12397 80 -23 15403 96 -23 17631 16 -24 407 53 -24 1389 72 -24 1795 21 -24 2497 85 -24 3103 73 -24 4425 57 -24 4749 28 -24 4873 41 -24 5653 92 -24 6043 1 -24 6751 82 -24 7375 97 -24 10265 93 -24 11551 48 -24 13303 97 -24 16483 89 -25 1333 55 -25 2150 100 -25 2608 76 -25 3454 100 -25 4880 29 -25 5954 34 -25 6955 40 -25 7874 65 -25 9472 48 -25 10159 24 -25 14488 26 -25 14635 68 -25 17000 40 -25 17752 55 -26 1989 26 -26 5053 4 -26 5385 97 -26 5721 81 -26 6647 64 -26 7337 45 -26 9679 18 -26 11895 77 -26 12851 56 -26 15039 34 -27 1305 44 -27 2137 96 -27 2671 92 -27 5831 61 -27 7139 59 -27 8167 28 -27 10757 15 -27 11441 15 -27 11509 65 -27 12237 89 -27 12749 31 -27 13885 66 -27 15025 26 -27 16029 59 -27 16419 65 -27 16767 60 -28 1807 98 -28 2817 8 -28 2967 29 -28 4483 78 -28 5437 15 -28 6411 3 -28 7965 93 -28 8043 58 -28 8407 14 -28 10295 13 -29 20 18 -29 1363 75 -29 2930 23 -29 3740 5 -29 7654 20 -29 9458 33 -29 10795 33 -29 12068 37 -29 12223 59 -29 13340 21 -29 13693 NULL -29 15085 40 -29 15626 NULL -29 15782 53 -30 217 91 -30 1951 59 -30 3238 16 -30 3506 15 -30 3928 87 -30 5431 77 -30 6752 69 -30 7870 7 -30 8666 21 -30 12572 33 -30 12670 20 -30 13579 75 -30 14848 62 -30 17348 62 -30 17875 78 -31 913 54 -31 4963 67 -31 6617 11 -31 6917 4 -31 7513 82 -31 11739 95 -31 14575 97 -31 14727 41 -31 15341 31 -31 15411 53 -31 16251 51 -32 1115 61 -32 2095 34 -32 2887 8 -32 4339 6 -32 4537 22 -32 4808 NULL -32 5798 87 -32 7547 24 -32 9683 26 -32 11005 46 -32 11348 41 -32 12134 21 -32 15001 57 -32 15644 34 -32 16421 74 -32 17659 51 -33 4798 27 -33 7300 3 -33 9649 36 -33 10376 21 -33 11119 92 -33 11756 26 -33 12643 89 -33 12760 54 -33 12964 80 -33 14125 66 -33 14158 82 -33 14692 93 -33 15478 22 -34 1526 91 -34 1717 53 -34 2312 6 -34 4118 88 -34 5197 63 -34 5449 9 -34 6193 61 -34 9325 3 -34 9766 83 -34 12016 42 -34 12290 53 -34 12512 60 -34 13814 20 -34 16324 30 -35 411 51 -35 2377 52 -35 3667 97 -35 4325 56 -35 5179 83 -35 11635 87 -35 11661 81 -35 14239 55 -35 15619 45 -35 15757 9 -35 17341 92 -35 17365 65 -35 17451 7 -36 1115 80 -36 2095 43 -36 2887 31 -36 7547 46 -36 11005 49 -36 11349 80 -36 15001 54 -36 15645 23 -36 16421 25 -36 17561 16 -36 17659 91 -37 2997 94 -37 7283 87 -37 10715 52 -37 10929 88 -37 13171 6 -37 15337 62 -37 16971 12 -37 17125 NULL -38 757 2 -38 2164 17 -38 3439 84 -38 4154 35 -38 5113 73 -38 6220 98 -38 7018 15 -38 7784 56 -38 8870 15 -38 9710 7 -38 10441 62 -38 15698 57 -39 386 89 -39 1598 64 -39 3476 73 -39 3943 64 -39 4190 86 -39 4957 24 -39 5393 98 -39 7097 78 -39 7118 67 -39 7604 49 -39 7697 24 -39 8078 54 -39 8411 96 -39 15491 54 -39 15625 17 -40 2854 71 -40 3490 65 -40 3985 63 -40 5098 35 -40 5318 87 -40 10094 80 -40 10912 23 -40 12050 NULL -40 13658 53 -40 16976 3 -41 10 50 -41 64 29 -41 3380 88 -41 5566 11 -41 6310 90 -41 7402 69 -41 7603 94 -41 9322 8 -41 10915 81 -41 14788 15 -41 15242 87 -41 15328 46 -41 16514 20 -42 619 69 -42 976 100 -42 1436 94 -42 2314 74 -42 2392 14 -42 2602 30 -42 3346 74 -42 3613 30 -42 6058 30 -42 6134 92 -42 8462 23 -42 9740 52 -42 10016 57 -42 10471 19 -42 12550 41 -42 15002 41 -43 2923 16 -43 3344 22 -43 3911 26 -43 4364 77 -43 4691 41 -43 5773 85 -43 5852 16 -43 11771 30 -43 14669 97 -44 2351 56 -44 2623 18 -44 7303 14 -44 7527 67 -44 9059 68 -44 11707 83 -44 12341 20 -44 13331 98 -44 13449 45 -44 14149 80 -44 15803 81 -44 16491 56 -44 16837 92 -44 16909 61 -45 811 62 -45 1479 49 -45 3265 98 -45 5309 18 -45 7363 87 -45 10115 68 -45 11095 40 -45 13133 46 -45 16349 6 -46 1960 12 -46 3010 67 -46 7040 33 -46 8065 NULL -46 11426 72 -46 13042 58 -46 15595 32 -46 16540 30 -46 17150 57 -46 17384 13 -47 254 NULL -47 481 30 -47 1132 66 -47 1916 71 -47 3085 51 -47 3202 7 -47 3878 NULL -47 4774 11 -47 5008 82 -47 5305 NULL -47 5468 7 -47 7214 1 -47 9770 33 -47 13246 47 -47 13477 10 -48 1761 22 -48 2820 4 -48 2829 65 -48 4431 39 -48 5971 29 -48 6085 1 -48 6684 44 -48 9199 88 -48 11259 NULL -48 12468 62 -48 13153 74 -48 17799 17 -49 749 60 -49 2135 4 -49 5342 69 -49 5852 47 -49 6805 40 -49 7141 94 -49 9049 68 -49 9553 71 -49 12737 48 -49 15155 84 -49 16361 4 -50 1280 69 -50 1312 30 -50 1909 53 -50 1984 40 -50 3097 64 -50 5023 NULL -50 7135 69 -50 16081 82 -51 422 21 -51 3091 28 -51 4687 6 -51 5029 12 -51 5059 51 -51 6565 33 -51 8384 79 -51 9311 90 -51 10133 54 -51 11234 NULL -51 12625 53 -51 13199 97 -51 17483 22 -51 17705 66 -52 2420 90 -52 3334 73 -52 6098 NULL -52 7606 45 -52 11488 76 -52 15649 29 -52 16646 48 -52 17402 91 -52 17456 37 -53 1114 40 -53 2095 62 -53 2786 70 -53 2887 39 -53 7546 58 -53 11348 38 -53 13220 76 -53 13795 38 -53 15991 37 -53 16420 14 -53 16648 79 -53 17296 43 -53 17560 15 -54 702 40 -54 825 50 -54 1165 62 -54 3861 NULL -54 6517 40 -54 9159 75 -54 14737 38 -54 16059 15 -54 16974 NULL -54 17479 34 -55 1339 16 -55 3001 7 -55 5137 33 -55 9703 44 -55 12170 92 -55 12205 90 -55 14135 36 -55 14923 71 -55 17677 17 -56 4242 2 -56 4506 57 -56 8353 35 -56 8691 59 -56 8707 68 -56 10362 54 -56 16620 23 -56 17331 74 -57 3253 71 -57 4028 88 -57 4933 22 -57 12596 91 -57 12721 62 -57 12740 52 -57 15182 86 -57 17729 26 -57 17993 99 -58 1829 52 -58 3848 6 -58 5117 2 -58 7649 19 -58 9743 62 -58 10802 14 -58 15635 6 -58 16472 6 -58 16949 35 -59 3133 92 -59 3546 22 -59 5772 70 -59 7087 80 -59 8010 46 -59 8335 36 -59 9348 62 -59 9397 92 -59 10651 100 -59 11916 19 -59 12858 90 -59 14529 44 -60 97 50 -60 555 62 -60 633 71 -60 999 43 -60 1117 78 -60 1573 90 -60 4041 25 -60 4235 28 -60 4513 72 -60 4937 22 -60 7231 95 -60 10277 62 -60 10393 75 -60 13975 14 -60 16887 25 -60 17755 88 -61 1106 4 -61 2264 36 -61 3362 48 -61 4567 26 -61 5528 78 -61 6380 77 -61 7591 78 -61 8924 11 -61 10330 8 -61 16462 26 -62 4093 94 -62 6403 NULL -62 8457 37 -62 10149 75 -62 12163 29 -62 12199 5 -62 12407 NULL -62 13559 80 -62 15399 74 -62 15733 40 -62 16151 93 -63 4488 73 -63 5079 79 -63 5217 66 -63 5658 99 -63 9319 80 -63 11370 38 -63 11946 85 -63 13339 19 -63 15793 40 -63 16569 69 -64 1213 NULL -64 3090 87 -64 3963 NULL -64 11835 82 -64 13224 NULL -64 14407 8 -64 15867 59 -64 15936 30 -64 16921 19 -64 17586 78 -64 17617 17 -65 2287 100 -65 4227 42 -65 9625 51 -65 9847 54 -65 13897 40 -65 14905 85 -65 15177 55 -65 17025 67 -66 6507 76 -66 7033 65 -66 7227 66 -66 8197 41 -66 9237 29 -66 10019 10 -66 11419 66 -66 15629 20 -66 16745 91 -66 16795 28 -67 757 77 -67 2133 74 -67 3439 73 -67 4155 87 -67 5113 NULL -67 7020 79 -67 7507 77 -67 8469 59 -67 8871 71 -67 12087 70 -67 15699 44 -68 1387 74 -68 1603 57 -68 1820 54 -68 2035 22 -68 2296 52 -68 2564 83 -68 5162 23 -68 6763 77 -68 7765 NULL -68 12526 3 -68 12724 88 -68 17426 2 -68 17600 13 -69 322 45 -69 337 34 -69 4208 9 -69 4267 10 -69 6136 7 -69 7264 67 -69 7822 30 -69 8599 53 -69 11137 68 -69 13489 66 -69 13792 NULL -69 15448 16 -70 1592 53 -70 2462 NULL -70 3296 48 -70 3947 NULL -70 6185 82 -70 6425 NULL -70 8893 17 -70 9857 20 -70 14549 4 -70 17815 95 -71 457 75 -71 1888 4 -71 2098 51 -71 4144 49 -71 5858 NULL -71 6008 54 -71 7504 3 -71 8887 10 -71 9274 36 -71 9769 79 -71 9790 96 -71 9997 26 -71 10108 66 -71 10288 30 -71 11168 79 -71 17246 90 -72 1535 9 -72 5917 85 -72 6113 45 -72 6671 13 -72 9860 26 -72 10427 66 -72 10753 16 -72 11741 62 -72 12788 29 -72 12901 57 -72 13085 94 -72 13423 62 -72 13904 37 -72 15587 87 -72 16765 56 -73 247 53 -73 1063 37 -73 3205 82 -73 4946 54 -73 6862 58 -73 10051 49 -73 12502 75 -73 15109 38 -73 16519 97 -73 16585 38 -73 17269 40 -74 326 29 -74 3104 78 -74 3175 23 -74 3278 NULL -74 3542 96 -74 3754 26 -74 5492 54 -74 7694 17 -74 8653 12 -74 9620 95 -74 10069 99 -74 13208 87 -74 16694 72 -75 607 20 -75 2948 25 -75 4625 73 -75 6938 89 -75 6953 71 -75 8726 6 -75 9905 54 -75 10217 85 -75 11039 70 -75 14186 63 -75 16796 93 -76 257 5 -76 465 2 -76 1107 16 -76 1503 97 -76 2265 98 -76 2869 32 -76 3363 25 -76 4237 48 -76 4567 40 -76 5529 78 -76 6381 50 -76 7591 27 -76 8925 6 -76 10331 3 -76 16463 53 -77 992 62 -77 1399 34 -77 2713 85 -77 3868 89 -77 6289 30 -77 7339 88 -77 7448 95 -77 7486 49 -77 8686 38 -77 9220 90 -77 11918 36 -77 12439 95 -77 13456 48 -77 14815 18 -77 16687 16 -78 901 3 -78 3304 50 -78 3856 27 -78 5965 78 -78 6044 59 -78 6110 43 -78 6500 76 -78 7576 87 -78 8611 79 -78 10507 6 -78 11209 7 -78 12706 19 -78 14996 39 -79 247 NULL -79 1063 85 -79 3205 48 -79 4947 35 -79 6864 1 -79 10051 10 -79 10524 36 -79 12504 81 -79 14322 41 -79 15109 NULL -79 15498 3 -79 15888 58 -79 16519 9 -79 16585 93 -79 17269 81 -80 998 93 -80 1519 25 -80 1573 40 -80 4040 66 -80 4513 NULL -80 4622 1 -80 7231 49 -80 7610 37 -80 10393 5 -80 12968 NULL -80 13717 91 -80 13975 13 -80 16363 84 -80 16886 77 -80 17308 29 -80 17755 94 -81 4486 31 -81 5078 75 -81 5216 64 -81 5656 24 -81 7166 7 -81 7663 79 -81 8918 37 -81 9319 36 -81 11107 36 -81 11368 26 -81 13339 6 -81 15793 8 -82 2572 53 -82 7862 75 -82 13138 59 -82 14998 49 -82 17041 18 +1 49 5 5.0 10.68 10.68 10.680000000000000000 10.680000000000000000 +1 173 65 65.0 27.16 27.16 27.160000000000000000 27.160000000000000000 +1 1553 50 50.0 67.71 67.71 67.710000000000000000 67.710000000000000000 +1 3248 58 58.0 4.57 4.57 4.570000000000000000 4.570000000000000000 +1 3617 79 79.0 11.41 11.41 11.410000000000000000 11.410000000000000000 +1 4553 100 100.0 25.08 25.08 25.080000000000000000 25.080000000000000000 +1 4583 72 72.0 84.72 84.72 84.720000000000000000 84.720000000000000000 +1 4682 44 44.0 31.07 31.07 31.070000000000000000 31.070000000000000000 +1 5527 88 88.0 52.41 52.41 52.410000000000000000 52.410000000000000000 +1 5981 14 14.0 57.37 57.37 57.370000000000000000 57.370000000000000000 +1 10993 91 91.0 93.48 93.48 93.480000000000000000 93.480000000000000000 +1 13283 37 37.0 63.63 63.63 63.630000000000000000 63.630000000000000000 +1 13538 14 14.0 11.54 11.54 11.540000000000000000 11.540000000000000000 +1 13631 99 99.0 80.52 80.52 80.520000000000000000 80.520000000000000000 +2 1363 4 4.0 13.46 13.46 13.460000000000000000 13.460000000000000000 +2 2930 36 36.0 61.23 61.23 61.230000000000000000 61.230000000000000000 +2 3740 49 49.0 6.55 6.55 6.550000000000000000 6.550000000000000000 +2 6928 65 65.0 93.86 93.86 93.860000000000000000 93.860000000000000000 +2 7654 25 25.0 74.26 74.26 74.260000000000000000 74.260000000000000000 +2 9436 79 79.0 88.02 88.02 88.020000000000000000 88.020000000000000000 +2 10768 30 30.0 2.27 2.27 2.270000000000000000 2.270000000000000000 +2 12068 74 74.0 16.55 16.55 16.550000000000000000 16.550000000000000000 +2 12223 78 78.0 65.71 65.71 65.710000000000000000 65.710000000000000000 +2 13340 71 71.0 36.01 36.01 36.010000000000000000 36.010000000000000000 +2 13927 93 93.0 35.87 35.87 35.870000000000000000 35.870000000000000000 +2 14701 58 58.0 53.09 53.09 53.090000000000000000 53.090000000000000000 +2 15085 88 88.0 64.43 64.43 64.430000000000000000 64.430000000000000000 +2 15782 62 62.0 77.97 77.97 77.970000000000000000 77.970000000000000000 +2 17420 NULL NULL 17.12 17.12 17.120000000000000000 17.120000000000000000 +3 246 96 96.0 98.02 98.02 98.020000000000000000 98.020000000000000000 +3 1531 NULL NULL NULL NULL NULL NULL +3 3525 42 42.0 97.03 97.03 97.030000000000000000 97.030000000000000000 +3 4698 98 98.0 85.0 85.0 85.000000000000000000 85.000000000000000000 +3 5355 53 53.0 23.04 23.04 23.040000000000000000 23.040000000000000000 +3 10693 27 27.0 37.04 37.04 37.040000000000000000 37.040000000000000000 +3 12447 82 82.0 56.14 56.14 56.140000000000000000 56.140000000000000000 +3 13021 64 64.0 74.69 74.69 74.690000000000000000 74.690000000000000000 +3 14100 79 79.0 44.66 44.66 44.660000000000000000 44.660000000000000000 +3 14443 4 4.0 95.75 95.75 95.750000000000000000 95.750000000000000000 +3 15786 56 56.0 4.31 4.31 4.310000000000000000 4.310000000000000000 +3 16869 4 4.0 75.67 75.67 75.670000000000000000 75.670000000000000000 +3 17263 17 17.0 72.38 72.38 72.380000000000000000 72.380000000000000000 +3 17971 88 88.0 27.95 27.95 27.950000000000000000 27.950000000000000000 +4 163 17 17.0 54.26 54.26 54.260000000000000000 54.260000000000000000 +4 1576 74 74.0 81.81 81.81 81.810000000000000000 81.810000000000000000 +4 5350 86 86.0 64.67 64.67 64.670000000000000000 64.670000000000000000 +4 5515 23 23.0 2.91 2.91 2.910000000000000000 2.910000000000000000 +4 6988 23 23.0 53.28 53.28 53.280000000000000000 53.280000000000000000 +4 7990 56 56.0 64.68 64.68 64.680000000000000000 64.680000000000000000 +4 8452 27 27.0 26.21 26.21 26.210000000000000000 26.210000000000000000 +4 9685 21 21.0 40.39 40.39 40.390000000000000000 40.390000000000000000 +4 11036 41 41.0 67.18 67.18 67.180000000000000000 67.180000000000000000 +4 12790 43 43.0 54.34 54.34 54.340000000000000000 54.340000000000000000 +5 1808 NULL NULL NULL NULL NULL NULL +5 1940 60 60.0 69.54 69.54 69.540000000000000000 69.540000000000000000 +5 5842 50 50.0 30.69 30.69 30.690000000000000000 30.690000000000000000 +5 6068 76 76.0 89.78 89.78 89.780000000000000000 89.780000000000000000 +5 6466 36 36.0 7.93 7.93 7.930000000000000000 7.930000000000000000 +5 11324 52 52.0 16.33 16.33 16.330000000000000000 16.330000000000000000 +5 11590 15 15.0 21.21 21.21 21.210000000000000000 21.210000000000000000 +5 12650 66 66.0 21.01 21.01 21.010000000000000000 21.010000000000000000 +5 13562 64 64.0 87.9 87.9 87.900000000000000000 87.900000000000000000 +5 13958 60 60.0 41.72 41.72 41.720000000000000000 41.720000000000000000 +5 14599 83 83.0 74.15 74.15 74.150000000000000000 74.150000000000000000 +5 14686 91 91.0 27.68 27.68 27.680000000000000000 27.680000000000000000 +5 15752 66 66.0 71.06 71.06 71.060000000000000000 71.060000000000000000 +5 16195 50 50.0 30.96 30.96 30.960000000000000000 30.960000000000000000 +5 16792 71 71.0 22.1 22.1 22.100000000000000000 22.100000000000000000 +6 2549 62 62.0 85.07 85.07 85.070000000000000000 85.070000000000000000 +6 2647 100 100.0 4.45 4.45 4.450000000000000000 4.450000000000000000 +6 3049 31 31.0 49.78 49.78 49.780000000000000000 49.780000000000000000 +6 3291 100 100.0 41.08 41.08 41.080000000000000000 41.080000000000000000 +6 6437 72 72.0 55.49 55.49 55.490000000000000000 55.490000000000000000 +6 8621 NULL NULL NULL NULL NULL NULL +6 10355 94 94.0 62.67 62.67 62.670000000000000000 62.670000000000000000 +6 10895 1 1.0 71.1 71.1 71.100000000000000000 71.100000000000000000 +6 11705 61 61.0 48.18 48.18 48.180000000000000000 48.180000000000000000 +6 13245 64 64.0 86.35 86.35 86.350000000000000000 86.350000000000000000 +6 13513 42 42.0 64.46 64.46 64.460000000000000000 64.460000000000000000 +7 4627 9 9.0 56.13 56.13 56.130000000000000000 56.130000000000000000 +7 4795 73 73.0 12.17 12.17 12.170000000000000000 12.170000000000000000 +7 4833 88 88.0 38.23 38.23 38.230000000000000000 38.230000000000000000 +7 5183 51 51.0 84.65 84.65 84.650000000000000000 84.650000000000000000 +7 5905 69 69.0 99.85 99.85 99.850000000000000000 99.850000000000000000 +7 8955 54 54.0 42.82 42.82 42.820000000000000000 42.820000000000000000 +7 9751 4 4.0 NULL NULL NULL NULL +7 10487 52 52.0 63.8 63.8 63.800000000000000000 63.800000000000000000 +7 12571 82 82.0 69.53 69.53 69.530000000000000000 69.530000000000000000 +7 15179 12 12.0 47.6 47.6 47.600000000000000000 47.600000000000000000 +7 15333 NULL NULL NULL NULL NULL NULL +7 17255 69 69.0 34.19 34.19 34.190000000000000000 34.190000000000000000 +8 665 31 31.0 15.64 15.64 15.640000000000000000 15.640000000000000000 +8 4183 90 90.0 81.63 81.63 81.630000000000000000 81.630000000000000000 +8 5929 83 83.0 14.11 14.11 14.110000000000000000 14.110000000000000000 +8 7115 54 54.0 36.99 36.99 36.990000000000000000 36.990000000000000000 +8 11365 7 7.0 18.65 18.65 18.650000000000000000 18.650000000000000000 +8 11893 95 95.0 21.29 21.29 21.290000000000000000 21.290000000000000000 +8 12041 95 95.0 91.8 91.8 91.800000000000000000 91.800000000000000000 +8 13427 87 87.0 31.78 31.78 31.780000000000000000 31.780000000000000000 +8 16671 20 20.0 18.95 18.95 18.950000000000000000 18.950000000000000000 +8 17119 51 51.0 8.04 8.04 8.040000000000000000 8.040000000000000000 +8 17545 49 49.0 72.15 72.15 72.150000000000000000 72.150000000000000000 +9 69 11 11.0 31.7 31.7 31.700000000000000000 31.700000000000000000 +9 889 6 6.0 27.17 27.17 27.170000000000000000 27.170000000000000000 +9 1185 62 62.0 55.68 55.68 55.680000000000000000 55.680000000000000000 +9 4623 34 34.0 2.97 2.97 2.970000000000000000 2.970000000000000000 +9 7945 83 83.0 8.1 8.1 8.100000000000000000 8.100000000000000000 +9 8334 71 71.0 34.79 34.79 34.790000000000000000 34.790000000000000000 +9 12027 27 27.0 98.68 98.68 98.680000000000000000 98.680000000000000000 +9 12969 59 59.0 88.31 88.31 88.310000000000000000 88.310000000000000000 +9 13483 NULL NULL 59.14 59.14 59.140000000000000000 59.140000000000000000 +9 13717 53 53.0 75.37 75.37 75.370000000000000000 75.370000000000000000 +9 15133 15 15.0 35.89 35.89 35.890000000000000000 35.890000000000000000 +9 16083 32 32.0 99.1 99.1 99.100000000000000000 99.100000000000000000 +9 16363 54 54.0 NULL NULL NULL NULL +9 16461 66 66.0 15.21 15.21 15.210000000000000000 15.210000000000000000 +9 16659 84 84.0 76.71 76.71 76.710000000000000000 76.710000000000000000 +9 17310 33 33.0 27.13 27.13 27.130000000000000000 27.130000000000000000 +10 755 74 74.0 82.24 82.24 82.240000000000000000 82.240000000000000000 +10 1425 92 92.0 NULL NULL NULL NULL +10 1511 76 76.0 31.47 31.47 31.470000000000000000 31.470000000000000000 +10 3433 83 83.0 10.26 10.26 10.260000000000000000 10.260000000000000000 +10 3933 52 52.0 52.19 52.19 52.190000000000000000 52.190000000000000000 +10 4357 17 17.0 88.36 88.36 88.360000000000000000 88.360000000000000000 +10 5863 47 47.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +10 9811 28 28.0 47.85 47.85 47.850000000000000000 47.850000000000000000 +10 13803 66 66.0 82.35 82.35 82.350000000000000000 82.350000000000000000 +10 15447 67 67.0 33.28 33.28 33.280000000000000000 33.280000000000000000 +11 157 84 84.0 64.63 64.63 64.630000000000000000 64.630000000000000000 +11 1315 70 70.0 45.84 45.84 45.840000000000000000 45.840000000000000000 +11 7519 68 68.0 7.16 7.16 7.160000000000000000 7.160000000000000000 +11 7608 66 66.0 8.34 8.34 8.340000000000000000 8.340000000000000000 +11 9901 57 57.0 46.93 46.93 46.930000000000000000 46.930000000000000000 +11 10699 33 33.0 73.77 73.77 73.770000000000000000 73.770000000000000000 +11 11490 NULL NULL NULL NULL NULL NULL +11 11991 38 38.0 3.27 3.27 3.270000000000000000 3.270000000000000000 +11 12438 16 16.0 92.94 92.94 92.940000000000000000 92.940000000000000000 +11 15157 96 96.0 15.52 15.52 15.520000000000000000 15.520000000000000000 +11 15649 33 33.0 66.11 66.11 66.110000000000000000 66.110000000000000000 +11 17226 11 11.0 34.03 34.03 34.030000000000000000 34.030000000000000000 +11 17395 85 85.0 38.04 38.04 38.040000000000000000 38.040000000000000000 +12 373 57 57.0 13.95 13.95 13.950000000000000000 13.950000000000000000 +12 1591 82 82.0 45.84 45.84 45.840000000000000000 45.840000000000000000 +12 4888 56 56.0 75.74 75.74 75.740000000000000000 75.740000000000000000 +12 6148 36 36.0 97.62 97.62 97.620000000000000000 97.620000000000000000 +12 6248 36 36.0 75.17 75.17 75.170000000000000000 75.170000000000000000 +12 9616 66 66.0 99.06 99.06 99.060000000000000000 99.060000000000000000 +12 9788 73 73.0 79.42 79.42 79.420000000000000000 79.420000000000000000 +12 13399 46 46.0 45.27 45.27 45.270000000000000000 45.270000000000000000 +12 14746 26 26.0 58.74 58.74 58.740000000000000000 58.740000000000000000 +12 14944 9 9.0 7.33 7.33 7.330000000000000000 7.330000000000000000 +12 15440 99 99.0 27.09 27.09 27.090000000000000000 27.090000000000000000 +13 868 NULL NULL 62.85 62.85 62.850000000000000000 62.850000000000000000 +13 1760 12 12.0 80.96 80.96 80.960000000000000000 80.960000000000000000 +13 1898 NULL NULL 96.46 96.46 96.460000000000000000 96.460000000000000000 +13 2108 9 9.0 NULL NULL NULL NULL +13 2191 NULL NULL NULL NULL NULL NULL +13 4430 73 73.0 5.86 5.86 5.860000000000000000 5.860000000000000000 +13 5971 80 80.0 72.61 72.61 72.610000000000000000 72.610000000000000000 +13 6085 58 58.0 21.45 21.45 21.450000000000000000 21.450000000000000000 +13 6140 15 15.0 89.9 89.9 89.900000000000000000 89.900000000000000000 +13 6682 80 80.0 32.05 32.05 32.050000000000000000 32.050000000000000000 +13 7640 48 48.0 17.06 17.06 17.060000000000000000 17.060000000000000000 +13 7723 27 27.0 59.09 59.09 59.090000000000000000 59.090000000000000000 +13 10096 12 12.0 17.14 17.14 17.140000000000000000 17.140000000000000000 +13 11758 34 34.0 72.24 72.24 72.240000000000000000 72.240000000000000000 +13 16894 87 87.0 20.99 20.99 20.990000000000000000 20.990000000000000000 +13 17240 20 20.0 93.85 93.85 93.850000000000000000 93.850000000000000000 +14 177 41 41.0 13.05 13.05 13.050000000000000000 13.050000000000000000 +14 769 20 20.0 26.29 26.29 26.290000000000000000 26.290000000000000000 +14 4507 4 4.0 45.45 45.45 45.450000000000000000 45.450000000000000000 +14 10175 19 19.0 39.97 39.97 39.970000000000000000 39.970000000000000000 +14 11549 6 6.0 19.33 19.33 19.330000000000000000 19.330000000000000000 +14 11653 60 60.0 86.94 86.94 86.940000000000000000 86.940000000000000000 +14 11817 81 81.0 60.77 60.77 60.770000000000000000 60.770000000000000000 +14 12587 NULL NULL NULL NULL NULL NULL +14 13069 77 77.0 93.6 93.6 93.600000000000000000 93.600000000000000000 +14 13515 57 57.0 87.32 87.32 87.320000000000000000 87.320000000000000000 +14 13845 17 17.0 52.3 52.3 52.300000000000000000 52.300000000000000000 +14 16741 46 46.0 76.43 76.43 76.430000000000000000 76.430000000000000000 +14 16929 14 14.0 54.76 54.76 54.760000000000000000 54.760000000000000000 +15 4241 21 21.0 89.07 89.07 89.070000000000000000 89.070000000000000000 +15 4505 59 59.0 77.35 77.35 77.350000000000000000 77.350000000000000000 +15 4777 28 28.0 36.86 36.86 36.860000000000000000 36.860000000000000000 +15 7391 98 98.0 53.76 53.76 53.760000000000000000 53.760000000000000000 +15 8336 15 15.0 44.09 44.09 44.090000000000000000 44.090000000000000000 +15 8353 NULL NULL NULL NULL NULL NULL +15 8690 32 32.0 67.37 67.37 67.370000000000000000 67.370000000000000000 +15 8707 21 21.0 48.54 48.54 48.540000000000000000 48.540000000000000000 +15 10361 39 39.0 74.88 74.88 74.880000000000000000 74.880000000000000000 +15 11659 80 80.0 86.23 86.23 86.230000000000000000 86.230000000000000000 +15 13172 25 25.0 47.11 47.11 47.110000000000000000 47.110000000000000000 +15 16619 81 81.0 80.21 80.21 80.210000000000000000 80.210000000000000000 +15 17267 7 7.0 30.61 30.61 30.610000000000000000 30.610000000000000000 +15 17330 82 82.0 67.45 67.45 67.450000000000000000 67.450000000000000000 +15 17564 26 26.0 63.52 63.52 63.520000000000000000 63.520000000000000000 +15 17857 38 38.0 96.35 96.35 96.350000000000000000 96.350000000000000000 +16 457 60 60.0 91.53 91.53 91.530000000000000000 91.530000000000000000 +16 1888 4 4.0 47.64 47.64 47.640000000000000000 47.640000000000000000 +16 4144 94 94.0 19.91 19.91 19.910000000000000000 19.910000000000000000 +16 6008 59 59.0 59.62 59.62 59.620000000000000000 59.620000000000000000 +16 7504 51 51.0 31.35 31.35 31.350000000000000000 31.350000000000000000 +16 8887 35 35.0 59.82 59.82 59.820000000000000000 59.820000000000000000 +16 9769 42 42.0 29.53 29.53 29.530000000000000000 29.530000000000000000 +16 9790 17 17.0 36.95 36.95 36.950000000000000000 36.950000000000000000 +16 9997 94 94.0 64.76 64.76 64.760000000000000000 64.760000000000000000 +16 11168 86 86.0 62.85 62.85 62.850000000000000000 62.850000000000000000 +16 11920 29 29.0 94.31 94.31 94.310000000000000000 94.310000000000000000 +16 16226 13 13.0 31.3 31.3 31.300000000000000000 31.300000000000000000 +16 17246 70 70.0 80.85 80.85 80.850000000000000000 80.850000000000000000 +17 2092 37 37.0 31.71 31.71 31.710000000000000000 31.710000000000000000 +17 4678 34 34.0 32.47 32.47 32.470000000000000000 32.470000000000000000 +17 6811 70 70.0 62.96 62.96 62.960000000000000000 62.960000000000000000 +17 9214 57 57.0 14.2 14.2 14.200000000000000000 14.200000000000000000 +17 10543 54 54.0 57.11 57.11 57.110000000000000000 57.110000000000000000 +17 11203 21 21.0 93.44 93.44 93.440000000000000000 93.440000000000000000 +17 13177 45 45.0 44.18 44.18 44.180000000000000000 44.180000000000000000 +17 13826 32 32.0 58.61 58.61 58.610000000000000000 58.610000000000000000 +17 15781 76 76.0 24.79 24.79 24.790000000000000000 24.790000000000000000 +17 17683 34 34.0 81.48 81.48 81.480000000000000000 81.480000000000000000 +18 2440 40 40.0 15.39 15.39 15.390000000000000000 15.390000000000000000 +18 5251 41 41.0 45.83 45.83 45.830000000000000000 45.830000000000000000 +18 7378 94 94.0 61.01 61.01 61.010000000000000000 61.010000000000000000 +18 8779 9 9.0 75.19 75.19 75.190000000000000000 75.190000000000000000 +18 8884 18 18.0 43.49 43.49 43.490000000000000000 43.490000000000000000 +18 9886 62 62.0 9.59 9.59 9.590000000000000000 9.590000000000000000 +18 11584 76 76.0 4.26 4.26 4.260000000000000000 4.260000000000000000 +18 11890 7 7.0 82.36 82.36 82.360000000000000000 82.360000000000000000 +18 12602 81 81.0 11.32 11.32 11.320000000000000000 11.320000000000000000 +18 12826 93 93.0 82.82 82.82 82.820000000000000000 82.820000000000000000 +18 12860 18 18.0 19.89 19.89 19.890000000000000000 19.890000000000000000 +18 14011 95 95.0 55.01 55.01 55.010000000000000000 55.010000000000000000 +18 14372 76 76.0 89.58 89.58 89.580000000000000000 89.580000000000000000 +18 14377 15 15.0 15.47 15.47 15.470000000000000000 15.470000000000000000 +18 17995 13 13.0 46.79 46.79 46.790000000000000000 46.790000000000000000 +19 1094 48 48.0 19.55 19.55 19.550000000000000000 19.550000000000000000 +19 3133 96 96.0 68.89 68.89 68.890000000000000000 68.890000000000000000 +19 3376 84 84.0 63.07 63.07 63.070000000000000000 63.070000000000000000 +19 4882 84 84.0 41.48 41.48 41.480000000000000000 41.480000000000000000 +19 6772 97 97.0 36.04 36.04 36.040000000000000000 36.040000000000000000 +19 7087 1 1.0 48.67 48.67 48.670000000000000000 48.670000000000000000 +19 7814 29 29.0 61.78 61.78 61.780000000000000000 61.780000000000000000 +19 8662 97 97.0 72.78 72.78 72.780000000000000000 72.780000000000000000 +19 9094 49 49.0 61.82 61.82 61.820000000000000000 61.820000000000000000 +19 9346 39 39.0 84.06 84.06 84.060000000000000000 84.060000000000000000 +19 10558 82 82.0 12.34 12.34 12.340000000000000000 12.340000000000000000 +19 10651 46 46.0 57.69 57.69 57.690000000000000000 57.690000000000000000 +19 11914 59 59.0 88.03 88.03 88.030000000000000000 88.030000000000000000 +19 16330 NULL NULL 79.15 79.15 79.150000000000000000 79.150000000000000000 +19 17539 20 20.0 69.2 69.2 69.200000000000000000 69.200000000000000000 +20 1451 89 89.0 84.34 84.34 84.340000000000000000 84.340000000000000000 +20 2618 4 4.0 69.47 69.47 69.470000000000000000 69.470000000000000000 +20 5312 9 9.0 29.45 29.45 29.450000000000000000 29.450000000000000000 +20 5425 15 15.0 28.19 28.19 28.190000000000000000 28.190000000000000000 +20 5483 8 8.0 30.74 30.74 30.740000000000000000 30.740000000000000000 +20 6026 21 21.0 80.56 80.56 80.560000000000000000 80.560000000000000000 +20 7207 90 90.0 83.12 83.12 83.120000000000000000 83.120000000000000000 +20 8714 NULL NULL 8.15 8.15 8.150000000000000000 8.150000000000000000 +20 9086 4 4.0 98.99 98.99 98.990000000000000000 98.990000000000000000 +20 9800 32 32.0 18.09 18.09 18.090000000000000000 18.090000000000000000 +20 13601 17 17.0 1.4 1.4 1.400000000000000000 1.400000000000000000 +20 14935 NULL NULL NULL NULL NULL NULL +20 15131 85 85.0 42.56 42.56 42.560000000000000000 42.560000000000000000 +21 230 48 48.0 13.37 13.37 13.370000000000000000 13.370000000000000000 +21 1810 59 59.0 66.37 66.37 66.370000000000000000 66.370000000000000000 +21 2870 50 50.0 91.94 91.94 91.940000000000000000 91.940000000000000000 +21 5170 45 45.0 90.0 90.0 90.000000000000000000 90.000000000000000000 +21 5998 51 51.0 9.41 9.41 9.410000000000000000 9.410000000000000000 +21 6476 49 49.0 20.29 20.29 20.290000000000000000 20.290000000000000000 +21 9187 14 14.0 35.49 35.49 35.490000000000000000 35.490000000000000000 +21 12266 47 47.0 11.55 11.55 11.550000000000000000 11.550000000000000000 +21 14368 18 18.0 51.29 51.29 51.290000000000000000 51.290000000000000000 +21 14396 88 88.0 45.26 45.26 45.260000000000000000 45.260000000000000000 +22 9985 70 70.0 21.46 21.46 21.460000000000000000 21.460000000000000000 +22 10474 31 31.0 45.65 45.65 45.650000000000000000 45.650000000000000000 +22 11599 66 66.0 5.01 5.01 5.010000000000000000 5.010000000000000000 +22 12415 10 10.0 38.97 38.97 38.970000000000000000 38.970000000000000000 +22 15310 15 15.0 82.24 82.24 82.240000000000000000 82.240000000000000000 +22 16396 85 85.0 86.46 86.46 86.460000000000000000 86.460000000000000000 +22 16922 88 88.0 28.0 28.0 28.000000000000000000 28.000000000000000000 +22 17392 14 14.0 51.86 51.86 51.860000000000000000 51.860000000000000000 +22 17660 70 70.0 95.56 95.56 95.560000000000000000 95.560000000000000000 +23 319 86 86.0 66.36 66.36 66.360000000000000000 66.360000000000000000 +23 7242 37 37.0 54.82 54.82 54.820000000000000000 54.820000000000000000 +23 8181 13 13.0 4.63 4.63 4.630000000000000000 4.630000000000000000 +23 8413 1 1.0 14.2 14.2 14.200000000000000000 14.200000000000000000 +23 9093 38 38.0 80.2 80.2 80.200000000000000000 80.200000000000000000 +23 9097 81 81.0 72.51 72.51 72.510000000000000000 72.510000000000000000 +23 11220 91 91.0 71.3 71.3 71.300000000000000000 71.300000000000000000 +23 11257 64 64.0 29.95 29.95 29.950000000000000000 29.950000000000000000 +23 12397 80 80.0 78.73 78.73 78.730000000000000000 78.730000000000000000 +23 15403 96 96.0 51.96 51.96 51.960000000000000000 51.960000000000000000 +23 17631 16 16.0 22.06 22.06 22.060000000000000000 22.060000000000000000 +24 407 53 53.0 98.05 98.05 98.050000000000000000 98.050000000000000000 +24 1389 72 72.0 60.01 60.01 60.010000000000000000 60.010000000000000000 +24 1795 21 21.0 76.67 76.67 76.670000000000000000 76.670000000000000000 +24 2497 85 85.0 57.93 57.93 57.930000000000000000 57.930000000000000000 +24 3103 73 73.0 44.96 44.96 44.960000000000000000 44.960000000000000000 +24 4425 57 57.0 29.31 29.31 29.310000000000000000 29.310000000000000000 +24 4749 28 28.0 18.17 18.17 18.170000000000000000 18.170000000000000000 +24 4873 41 41.0 40.34 40.34 40.340000000000000000 40.340000000000000000 +24 5653 92 92.0 64.99 64.99 64.990000000000000000 64.990000000000000000 +24 6043 1 1.0 33.41 33.41 33.410000000000000000 33.410000000000000000 +24 6751 82 82.0 7.48 7.48 7.480000000000000000 7.480000000000000000 +24 7375 97 97.0 78.55 78.55 78.550000000000000000 78.550000000000000000 +24 10265 93 93.0 12.03 12.03 12.030000000000000000 12.030000000000000000 +24 11551 48 48.0 30.8 30.8 30.800000000000000000 30.800000000000000000 +24 13303 97 97.0 94.48 94.48 94.480000000000000000 94.480000000000000000 +24 16483 89 89.0 13.84 13.84 13.840000000000000000 13.840000000000000000 +25 1333 55 55.0 30.82 30.82 30.820000000000000000 30.820000000000000000 +25 2150 100 100.0 67.24 67.24 67.240000000000000000 67.240000000000000000 +25 2608 76 76.0 87.75 87.75 87.750000000000000000 87.750000000000000000 +25 3454 100 100.0 1.61 1.61 1.610000000000000000 1.610000000000000000 +25 4880 29 29.0 15.35 15.35 15.350000000000000000 15.350000000000000000 +25 5954 34 34.0 76.57 76.57 76.570000000000000000 76.570000000000000000 +25 6955 40 40.0 87.12 87.12 87.120000000000000000 87.120000000000000000 +25 7874 65 65.0 2.75 2.75 2.750000000000000000 2.750000000000000000 +25 9472 48 48.0 4.97 4.97 4.970000000000000000 4.970000000000000000 +25 10159 24 24.0 76.64 76.64 76.640000000000000000 76.640000000000000000 +25 14488 26 26.0 68.17 68.17 68.170000000000000000 68.170000000000000000 +25 14635 68 68.0 45.79 45.79 45.790000000000000000 45.790000000000000000 +25 17000 40 40.0 89.34 89.34 89.340000000000000000 89.340000000000000000 +25 17752 55 55.0 11.49 11.49 11.490000000000000000 11.490000000000000000 +26 1989 26 26.0 83.31 83.31 83.310000000000000000 83.310000000000000000 +26 5053 4 4.0 19.63 19.63 19.630000000000000000 19.630000000000000000 +26 5385 97 97.0 51.89 51.89 51.890000000000000000 51.890000000000000000 +26 5721 81 81.0 74.96 74.96 74.960000000000000000 74.960000000000000000 +26 6647 64 64.0 57.04 57.04 57.040000000000000000 57.040000000000000000 +26 7337 45 45.0 37.59 37.59 37.590000000000000000 37.590000000000000000 +26 9679 18 18.0 77.54 77.54 77.540000000000000000 77.540000000000000000 +26 11895 77 77.0 36.85 36.85 36.850000000000000000 36.850000000000000000 +26 12851 56 56.0 14.02 14.02 14.020000000000000000 14.020000000000000000 +26 15039 34 34.0 22.65 22.65 22.650000000000000000 22.650000000000000000 +27 1305 44 44.0 8.35 8.35 8.350000000000000000 8.350000000000000000 +27 2137 96 96.0 3.07 3.07 3.070000000000000000 3.070000000000000000 +27 2671 92 92.0 4.35 4.35 4.350000000000000000 4.350000000000000000 +27 5831 61 61.0 8.79 8.79 8.790000000000000000 8.790000000000000000 +27 7139 59 59.0 6.17 6.17 6.170000000000000000 6.170000000000000000 +27 8167 28 28.0 38.83 38.83 38.830000000000000000 38.830000000000000000 +27 10757 15 15.0 8.7 8.7 8.700000000000000000 8.700000000000000000 +27 11441 15 15.0 14.45 14.45 14.450000000000000000 14.450000000000000000 +27 11509 65 65.0 80.34 80.34 80.340000000000000000 80.340000000000000000 +27 12237 89 89.0 73.9 73.9 73.900000000000000000 73.900000000000000000 +27 12749 31 31.0 80.27 80.27 80.270000000000000000 80.270000000000000000 +27 13885 66 66.0 40.62 40.62 40.620000000000000000 40.620000000000000000 +27 15025 26 26.0 35.56 35.56 35.560000000000000000 35.560000000000000000 +27 16029 59 59.0 2.11 2.11 2.110000000000000000 2.110000000000000000 +27 16419 65 65.0 80.1 80.1 80.100000000000000000 80.100000000000000000 +27 16767 60 60.0 68.33 68.33 68.330000000000000000 68.330000000000000000 +28 1807 98 98.0 78.91 78.91 78.910000000000000000 78.910000000000000000 +28 2817 8 8.0 98.75 98.75 98.750000000000000000 98.750000000000000000 +28 2967 29 29.0 47.87 47.87 47.870000000000000000 47.870000000000000000 +28 4483 78 78.0 73.9 73.9 73.900000000000000000 73.900000000000000000 +28 5437 15 15.0 7.49 7.49 7.490000000000000000 7.490000000000000000 +28 6411 3 3.0 67.26 67.26 67.260000000000000000 67.260000000000000000 +28 7965 93 93.0 77.74 77.74 77.740000000000000000 77.740000000000000000 +28 8043 58 58.0 60.26 60.26 60.260000000000000000 60.260000000000000000 +28 8407 14 14.0 95.01 95.01 95.010000000000000000 95.010000000000000000 +28 10295 13 13.0 31.83 31.83 31.830000000000000000 31.830000000000000000 +29 20 18 18.0 66.26 66.26 66.260000000000000000 66.260000000000000000 +29 1363 75 75.0 NULL NULL NULL NULL +29 2930 23 23.0 64.78 64.78 64.780000000000000000 64.780000000000000000 +29 3740 5 5.0 90.13 90.13 90.130000000000000000 90.130000000000000000 +29 7654 20 20.0 98.14 98.14 98.140000000000000000 98.140000000000000000 +29 9458 33 33.0 52.33 52.33 52.330000000000000000 52.330000000000000000 +29 10795 33 33.0 68.24 68.24 68.240000000000000000 68.240000000000000000 +29 12068 37 37.0 80.75 80.75 80.750000000000000000 80.750000000000000000 +29 12223 59 59.0 12.89 12.89 12.890000000000000000 12.890000000000000000 +29 13340 21 21.0 40.5 40.5 40.500000000000000000 40.500000000000000000 +29 13693 NULL NULL 95.63 95.63 95.630000000000000000 95.630000000000000000 +29 15085 40 40.0 NULL NULL NULL NULL +29 15626 NULL NULL 17.61 17.61 17.610000000000000000 17.610000000000000000 +29 15782 53 53.0 57.11 57.11 57.110000000000000000 57.110000000000000000 +30 217 91 91.0 52.03 52.03 52.030000000000000000 52.030000000000000000 +30 1951 59 59.0 17.14 17.14 17.140000000000000000 17.140000000000000000 +30 3238 16 16.0 9.84 9.84 9.840000000000000000 9.840000000000000000 +30 3506 15 15.0 16.31 16.31 16.310000000000000000 16.310000000000000000 +30 3928 87 87.0 27.01 27.01 27.010000000000000000 27.010000000000000000 +30 5431 77 77.0 52.37 52.37 52.370000000000000000 52.370000000000000000 +30 6752 69 69.0 40.8 40.8 40.800000000000000000 40.800000000000000000 +30 7870 7 7.0 4.51 4.51 4.510000000000000000 4.510000000000000000 +30 8666 21 21.0 64.0 64.0 64.000000000000000000 64.000000000000000000 +30 12572 33 33.0 61.96 61.96 61.960000000000000000 61.960000000000000000 +30 12670 20 20.0 6.44 6.44 6.440000000000000000 6.440000000000000000 +30 13579 75 75.0 62.71 62.71 62.710000000000000000 62.710000000000000000 +30 14848 62 62.0 64.03 64.03 64.030000000000000000 64.030000000000000000 +30 17348 62 62.0 88.74 88.74 88.740000000000000000 88.740000000000000000 +30 17875 78 78.0 2.91 2.91 2.910000000000000000 2.910000000000000000 +31 913 54 54.0 79.11 79.11 79.110000000000000000 79.110000000000000000 +31 4963 67 67.0 56.37 56.37 56.370000000000000000 56.370000000000000000 +31 6617 11 11.0 86.78 86.78 86.780000000000000000 86.780000000000000000 +31 6917 4 4.0 49.76 49.76 49.760000000000000000 49.760000000000000000 +31 7513 82 82.0 44.95 44.95 44.950000000000000000 44.950000000000000000 +31 11739 95 95.0 6.99 6.99 6.990000000000000000 6.990000000000000000 +31 14575 97 97.0 59.9 59.9 59.900000000000000000 59.900000000000000000 +31 14727 41 41.0 48.1 48.1 48.100000000000000000 48.100000000000000000 +31 15341 31 31.0 16.15 16.15 16.150000000000000000 16.150000000000000000 +31 15411 53 53.0 47.64 47.64 47.640000000000000000 47.640000000000000000 +31 16251 51 51.0 91.49 91.49 91.490000000000000000 91.490000000000000000 +32 1115 61 61.0 97.03 97.03 97.030000000000000000 97.030000000000000000 +32 2095 34 34.0 89.33 89.33 89.330000000000000000 89.330000000000000000 +32 2887 8 8.0 48.71 48.71 48.710000000000000000 48.710000000000000000 +32 4339 6 6.0 88.27 88.27 88.270000000000000000 88.270000000000000000 +32 4537 22 22.0 65.72 65.72 65.720000000000000000 65.720000000000000000 +32 4808 NULL NULL 57.01 57.01 57.010000000000000000 57.010000000000000000 +32 5798 87 87.0 46.23 46.23 46.230000000000000000 46.230000000000000000 +32 7547 24 24.0 43.33 43.33 43.330000000000000000 43.330000000000000000 +32 9683 26 26.0 NULL NULL NULL NULL +32 11005 46 46.0 51.48 51.48 51.480000000000000000 51.480000000000000000 +32 11348 41 41.0 55.14 55.14 55.140000000000000000 55.140000000000000000 +32 12134 21 21.0 51.01 51.01 51.010000000000000000 51.010000000000000000 +32 15001 57 57.0 30.07 30.07 30.070000000000000000 30.070000000000000000 +32 15644 34 34.0 80.54 80.54 80.540000000000000000 80.540000000000000000 +32 16421 74 74.0 89.89 89.89 89.890000000000000000 89.890000000000000000 +32 17659 51 51.0 23.88 23.88 23.880000000000000000 23.880000000000000000 +33 4798 27 27.0 28.56 28.56 28.560000000000000000 28.560000000000000000 +33 7300 3 3.0 3.13 3.13 3.130000000000000000 3.130000000000000000 +33 9649 36 36.0 18.91 18.91 18.910000000000000000 18.910000000000000000 +33 10376 21 21.0 55.09 55.09 55.090000000000000000 55.090000000000000000 +33 11119 92 92.0 3.49 3.49 3.490000000000000000 3.490000000000000000 +33 11756 26 26.0 58.87 58.87 58.870000000000000000 58.870000000000000000 +33 12643 89 89.0 35.74 35.74 35.740000000000000000 35.740000000000000000 +33 12760 54 54.0 48.97 48.97 48.970000000000000000 48.970000000000000000 +33 12964 80 80.0 83.86 83.86 83.860000000000000000 83.860000000000000000 +33 14125 66 66.0 44.03 44.03 44.030000000000000000 44.030000000000000000 +33 14158 82 82.0 48.07 48.07 48.070000000000000000 48.070000000000000000 +33 14692 93 93.0 56.78 56.78 56.780000000000000000 56.780000000000000000 +33 15478 22 22.0 95.96 95.96 95.960000000000000000 95.960000000000000000 +34 1526 91 91.0 78.12 78.12 78.120000000000000000 78.120000000000000000 +34 1717 53 53.0 99.68 99.68 99.680000000000000000 99.680000000000000000 +34 2312 6 6.0 51.4 51.4 51.400000000000000000 51.400000000000000000 +34 4118 88 88.0 38.38 38.38 38.380000000000000000 38.380000000000000000 +34 5197 63 63.0 13.5 13.5 13.500000000000000000 13.500000000000000000 +34 5449 9 9.0 21.24 21.24 21.240000000000000000 21.240000000000000000 +34 6193 61 61.0 54.55 54.55 54.550000000000000000 54.550000000000000000 +34 9325 3 3.0 92.35 92.35 92.350000000000000000 92.350000000000000000 +34 9766 83 83.0 68.57 68.57 68.570000000000000000 68.570000000000000000 +34 12016 42 42.0 42.44 42.44 42.440000000000000000 42.440000000000000000 +34 12290 53 53.0 88.61 88.61 88.610000000000000000 88.610000000000000000 +34 12512 60 60.0 40.48 40.48 40.480000000000000000 40.480000000000000000 +34 13814 20 20.0 22.82 22.82 22.820000000000000000 22.820000000000000000 +34 16324 30 30.0 37.27 37.27 37.270000000000000000 37.270000000000000000 +35 411 51 51.0 NULL NULL NULL NULL +35 2377 52 52.0 98.03 98.03 98.030000000000000000 98.030000000000000000 +35 3667 97 97.0 59.31 59.31 59.310000000000000000 59.310000000000000000 +35 4325 56 56.0 67.43 67.43 67.430000000000000000 67.430000000000000000 +35 5179 83 83.0 90.54 90.54 90.540000000000000000 90.540000000000000000 +35 11635 87 87.0 92.02 92.02 92.020000000000000000 92.020000000000000000 +35 11661 81 81.0 NULL NULL NULL NULL +35 14239 55 55.0 8.27 8.27 8.270000000000000000 8.270000000000000000 +35 15619 45 45.0 90.28 90.28 90.280000000000000000 90.280000000000000000 +35 15757 9 9.0 14.83 14.83 14.830000000000000000 14.830000000000000000 +35 17341 92 92.0 59.48 59.48 59.480000000000000000 59.480000000000000000 +35 17365 65 65.0 76.2 76.2 76.200000000000000000 76.200000000000000000 +35 17451 7 7.0 45.66 45.66 45.660000000000000000 45.660000000000000000 +36 1115 80 80.0 11.13 11.13 11.130000000000000000 11.130000000000000000 +36 2095 43 43.0 91.17 91.17 91.170000000000000000 91.170000000000000000 +36 2887 31 31.0 24.53 24.53 24.530000000000000000 24.530000000000000000 +36 7547 46 46.0 8.04 8.04 8.040000000000000000 8.040000000000000000 +36 11005 49 49.0 70.6 70.6 70.600000000000000000 70.600000000000000000 +36 11349 80 80.0 58.17 58.17 58.170000000000000000 58.170000000000000000 +36 15001 54 54.0 16.24 16.24 16.240000000000000000 16.240000000000000000 +36 15645 23 23.0 32.35 32.35 32.350000000000000000 32.350000000000000000 +36 16421 25 25.0 69.67 69.67 69.670000000000000000 69.670000000000000000 +36 17561 16 16.0 82.46 82.46 82.460000000000000000 82.460000000000000000 +36 17659 91 91.0 44.83 44.83 44.830000000000000000 44.830000000000000000 +37 2997 94 94.0 85.67 85.67 85.670000000000000000 85.670000000000000000 +37 7283 87 87.0 54.25 54.25 54.250000000000000000 54.250000000000000000 +37 10715 52 52.0 89.22 89.22 89.220000000000000000 89.220000000000000000 +37 10929 88 88.0 65.45 65.45 65.450000000000000000 65.450000000000000000 +37 13171 6 6.0 84.14 84.14 84.140000000000000000 84.140000000000000000 +37 15337 62 62.0 16.64 16.64 16.640000000000000000 16.640000000000000000 +37 16971 12 12.0 53.97 53.97 53.970000000000000000 53.970000000000000000 +37 17125 NULL NULL NULL NULL NULL NULL +38 757 2 2.0 NULL NULL NULL NULL +38 2164 17 17.0 72.04 72.04 72.040000000000000000 72.040000000000000000 +38 3439 84 84.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +38 4154 35 35.0 10.28 10.28 10.280000000000000000 10.280000000000000000 +38 5113 73 73.0 50.59 50.59 50.590000000000000000 50.590000000000000000 +38 6220 98 98.0 14.54 14.54 14.540000000000000000 14.540000000000000000 +38 7018 15 15.0 69.78 69.78 69.780000000000000000 69.780000000000000000 +38 7784 56 56.0 31.89 31.89 31.890000000000000000 31.890000000000000000 +38 8870 15 15.0 46.69 46.69 46.690000000000000000 46.690000000000000000 +38 9710 7 7.0 82.77 82.77 82.770000000000000000 82.770000000000000000 +38 10441 62 62.0 80.37 80.37 80.370000000000000000 80.370000000000000000 +38 15698 57 57.0 11.4 11.4 11.400000000000000000 11.400000000000000000 +39 386 89 89.0 28.08 28.08 28.080000000000000000 28.080000000000000000 +39 1598 64 64.0 44.63 44.63 44.630000000000000000 44.630000000000000000 +39 3476 73 73.0 80.57 80.57 80.570000000000000000 80.570000000000000000 +39 3943 64 64.0 59.68 59.68 59.680000000000000000 59.680000000000000000 +39 4190 86 86.0 35.56 35.56 35.560000000000000000 35.560000000000000000 +39 4957 24 24.0 16.1 16.1 16.100000000000000000 16.100000000000000000 +39 5393 98 98.0 58.75 58.75 58.750000000000000000 58.750000000000000000 +39 7097 78 78.0 33.1 33.1 33.100000000000000000 33.100000000000000000 +39 7118 67 67.0 68.99 68.99 68.990000000000000000 68.990000000000000000 +39 7604 49 49.0 46.49 46.49 46.490000000000000000 46.490000000000000000 +39 7697 24 24.0 44.89 44.89 44.890000000000000000 44.890000000000000000 +39 8078 54 54.0 73.6 73.6 73.600000000000000000 73.600000000000000000 +39 8411 96 96.0 35.69 35.69 35.690000000000000000 35.690000000000000000 +39 15491 54 54.0 3.2 3.2 3.200000000000000000 3.200000000000000000 +39 15625 17 17.0 96.62 96.62 96.620000000000000000 96.620000000000000000 +40 2854 71 71.0 10.62 10.62 10.620000000000000000 10.620000000000000000 +40 3490 65 65.0 41.24 41.24 41.240000000000000000 41.240000000000000000 +40 3985 63 63.0 22.94 22.94 22.940000000000000000 22.940000000000000000 +40 5098 35 35.0 33.91 33.91 33.910000000000000000 33.910000000000000000 +40 5318 87 87.0 32.66 32.66 32.660000000000000000 32.660000000000000000 +40 10094 80 80.0 8.63 8.63 8.630000000000000000 8.630000000000000000 +40 10912 23 23.0 2.46 2.46 2.460000000000000000 2.460000000000000000 +40 12050 NULL NULL 38.12 38.12 38.120000000000000000 38.120000000000000000 +40 13658 53 53.0 56.42 56.42 56.420000000000000000 56.420000000000000000 +40 16976 3 3.0 20.7 20.7 20.700000000000000000 20.700000000000000000 +41 10 50 50.0 54.36 54.36 54.360000000000000000 54.360000000000000000 +41 64 29 29.0 27.18 27.18 27.180000000000000000 27.180000000000000000 +41 3380 88 88.0 14.11 14.11 14.110000000000000000 14.110000000000000000 +41 5566 11 11.0 50.45 50.45 50.450000000000000000 50.450000000000000000 +41 6310 90 90.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +41 7402 69 69.0 57.23 57.23 57.230000000000000000 57.230000000000000000 +41 7603 94 94.0 6.12 6.12 6.120000000000000000 6.120000000000000000 +41 9322 8 8.0 59.4 59.4 59.400000000000000000 59.400000000000000000 +41 10915 81 81.0 91.63 91.63 91.630000000000000000 91.630000000000000000 +41 14788 15 15.0 90.04 90.04 90.040000000000000000 90.040000000000000000 +41 15242 87 87.0 48.25 48.25 48.250000000000000000 48.250000000000000000 +41 15328 46 46.0 84.03 84.03 84.030000000000000000 84.030000000000000000 +41 16514 20 20.0 5.05 5.05 5.050000000000000000 5.050000000000000000 +42 619 69 69.0 56.85 56.85 56.850000000000000000 56.850000000000000000 +42 976 100 100.0 12.59 12.59 12.590000000000000000 12.590000000000000000 +42 1436 94 94.0 54.21 54.21 54.210000000000000000 54.210000000000000000 +42 2314 74 74.0 24.46 24.46 24.460000000000000000 24.460000000000000000 +42 2392 14 14.0 49.48 49.48 49.480000000000000000 49.480000000000000000 +42 2602 30 30.0 55.77 55.77 55.770000000000000000 55.770000000000000000 +42 3346 74 74.0 29.72 29.72 29.720000000000000000 29.720000000000000000 +42 3613 30 30.0 56.33 56.33 56.330000000000000000 56.330000000000000000 +42 6058 30 30.0 81.1 81.1 81.100000000000000000 81.100000000000000000 +42 6134 92 92.0 18.91 18.91 18.910000000000000000 18.910000000000000000 +42 8462 23 23.0 27.88 27.88 27.880000000000000000 27.880000000000000000 +42 9740 52 52.0 52.46 52.46 52.460000000000000000 52.460000000000000000 +42 10016 57 57.0 12.47 12.47 12.470000000000000000 12.470000000000000000 +42 10471 19 19.0 42.67 42.67 42.670000000000000000 42.670000000000000000 +42 12550 41 41.0 17.09 17.09 17.090000000000000000 17.090000000000000000 +42 15002 41 41.0 58.33 58.33 58.330000000000000000 58.330000000000000000 +43 2923 16 16.0 82.12 82.12 82.120000000000000000 82.120000000000000000 +43 3344 22 22.0 88.77 88.77 88.770000000000000000 88.770000000000000000 +43 3911 26 26.0 21.75 21.75 21.750000000000000000 21.750000000000000000 +43 4364 77 77.0 82.92 82.92 82.920000000000000000 82.920000000000000000 +43 4691 41 41.0 2.24 2.24 2.240000000000000000 2.240000000000000000 +43 5773 85 85.0 66.42 66.42 66.420000000000000000 66.420000000000000000 +43 5852 16 16.0 81.99 81.99 81.990000000000000000 81.990000000000000000 +43 11771 30 30.0 41.13 41.13 41.130000000000000000 41.130000000000000000 +43 14669 97 97.0 52.94 52.94 52.940000000000000000 52.940000000000000000 +44 2351 56 56.0 55.53 55.53 55.530000000000000000 55.530000000000000000 +44 2623 18 18.0 39.17 39.17 39.170000000000000000 39.170000000000000000 +44 7303 14 14.0 36.13 36.13 36.130000000000000000 36.130000000000000000 +44 7527 67 67.0 90.05 90.05 90.050000000000000000 90.050000000000000000 +44 9059 68 68.0 30.11 30.11 30.110000000000000000 30.110000000000000000 +44 11707 83 83.0 85.49 85.49 85.490000000000000000 85.490000000000000000 +44 12341 20 20.0 82.28 82.28 82.280000000000000000 82.280000000000000000 +44 13331 98 98.0 3.53 3.53 3.530000000000000000 3.530000000000000000 +44 13449 45 45.0 50.83 50.83 50.830000000000000000 50.830000000000000000 +44 14149 80 80.0 18.83 18.83 18.830000000000000000 18.830000000000000000 +44 15803 81 81.0 43.81 43.81 43.810000000000000000 43.810000000000000000 +44 16491 56 56.0 32.28 32.28 32.280000000000000000 32.280000000000000000 +44 16837 92 92.0 30.11 30.11 30.110000000000000000 30.110000000000000000 +44 16909 61 61.0 92.15 92.15 92.150000000000000000 92.150000000000000000 +45 811 62 62.0 23.41 23.41 23.410000000000000000 23.410000000000000000 +45 1479 49 49.0 5.01 5.01 5.010000000000000000 5.010000000000000000 +45 3265 98 98.0 27.12 27.12 27.120000000000000000 27.120000000000000000 +45 5309 18 18.0 51.16 51.16 51.160000000000000000 51.160000000000000000 +45 7363 87 87.0 85.95 85.95 85.950000000000000000 85.950000000000000000 +45 10115 68 68.0 38.09 38.09 38.090000000000000000 38.090000000000000000 +45 11095 40 40.0 52.97 52.97 52.970000000000000000 52.970000000000000000 +45 13133 46 46.0 85.87 85.87 85.870000000000000000 85.870000000000000000 +45 16349 6 6.0 94.59 94.59 94.590000000000000000 94.590000000000000000 +46 1960 12 12.0 53.47 53.47 53.470000000000000000 53.470000000000000000 +46 3010 67 67.0 66.87 66.87 66.870000000000000000 66.870000000000000000 +46 7040 33 33.0 90.87 90.87 90.870000000000000000 90.870000000000000000 +46 8065 NULL NULL 43.04 43.04 43.040000000000000000 43.040000000000000000 +46 11426 72 72.0 53.81 53.81 53.810000000000000000 53.810000000000000000 +46 13042 58 58.0 41.38 41.38 41.380000000000000000 41.380000000000000000 +46 15595 32 32.0 29.12 29.12 29.120000000000000000 29.120000000000000000 +46 16540 30 30.0 54.36 54.36 54.360000000000000000 54.360000000000000000 +46 17150 57 57.0 71.68 71.68 71.680000000000000000 71.680000000000000000 +46 17384 13 13.0 93.68 93.68 93.680000000000000000 93.680000000000000000 +47 254 NULL NULL NULL NULL NULL NULL +47 481 30 30.0 36.51 36.51 36.510000000000000000 36.510000000000000000 +47 1132 66 66.0 53.46 53.46 53.460000000000000000 53.460000000000000000 +47 1916 71 71.0 47.62 47.62 47.620000000000000000 47.620000000000000000 +47 3085 51 51.0 63.55 63.55 63.550000000000000000 63.550000000000000000 +47 3202 7 7.0 26.06 26.06 26.060000000000000000 26.060000000000000000 +47 3878 NULL NULL NULL NULL NULL NULL +47 4774 11 11.0 63.71 63.71 63.710000000000000000 63.710000000000000000 +47 5008 82 82.0 1.76 1.76 1.760000000000000000 1.760000000000000000 +47 5305 NULL NULL 84.7 84.7 84.700000000000000000 84.700000000000000000 +47 5468 7 7.0 5.03 5.03 5.030000000000000000 5.030000000000000000 +47 7214 1 1.0 12.8 12.8 12.800000000000000000 12.800000000000000000 +47 9770 33 33.0 69.12 69.12 69.120000000000000000 69.120000000000000000 +47 13246 47 47.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +47 13477 10 10.0 78.83 78.83 78.830000000000000000 78.830000000000000000 +48 1761 22 22.0 55.73 55.73 55.730000000000000000 55.730000000000000000 +48 2820 4 4.0 6.46 6.46 6.460000000000000000 6.460000000000000000 +48 2829 65 65.0 22.1 22.1 22.100000000000000000 22.100000000000000000 +48 4431 39 39.0 97.07 97.07 97.070000000000000000 97.070000000000000000 +48 5971 29 29.0 40.46 40.46 40.460000000000000000 40.460000000000000000 +48 6085 1 1.0 58.13 58.13 58.130000000000000000 58.130000000000000000 +48 6684 44 44.0 20.22 20.22 20.220000000000000000 20.220000000000000000 +48 9199 88 88.0 37.89 37.89 37.890000000000000000 37.890000000000000000 +48 11259 NULL NULL NULL NULL NULL NULL +48 12468 62 62.0 43.72 43.72 43.720000000000000000 43.720000000000000000 +48 13153 74 74.0 34.26 34.26 34.260000000000000000 34.260000000000000000 +48 17799 17 17.0 80.36 80.36 80.360000000000000000 80.360000000000000000 +49 749 60 60.0 42.11 42.11 42.110000000000000000 42.110000000000000000 +49 2135 4 4.0 15.8 15.8 15.800000000000000000 15.800000000000000000 +49 5342 69 69.0 46.41 46.41 46.410000000000000000 46.410000000000000000 +49 5852 47 47.0 74.9 74.9 74.900000000000000000 74.900000000000000000 +49 6805 40 40.0 12.9 12.9 12.900000000000000000 12.900000000000000000 +49 7141 94 94.0 50.5 50.5 50.500000000000000000 50.500000000000000000 +49 9049 68 68.0 75.38 75.38 75.380000000000000000 75.380000000000000000 +49 9553 71 71.0 29.28 29.28 29.280000000000000000 29.280000000000000000 +49 12737 48 48.0 2.17 2.17 2.170000000000000000 2.170000000000000000 +49 15155 84 84.0 4.4 4.4 4.400000000000000000 4.400000000000000000 +49 16361 4 4.0 79.85 79.85 79.850000000000000000 79.850000000000000000 +50 1280 69 69.0 8.66 8.66 8.660000000000000000 8.660000000000000000 +50 1312 30 30.0 25.84 25.84 25.840000000000000000 25.840000000000000000 +50 1909 53 53.0 56.01 56.01 56.010000000000000000 56.010000000000000000 +50 1984 40 40.0 8.81 8.81 8.810000000000000000 8.810000000000000000 +50 3097 64 64.0 33.17 33.17 33.170000000000000000 33.170000000000000000 +50 5023 NULL NULL 16.24 16.24 16.240000000000000000 16.240000000000000000 +50 7135 69 69.0 12.68 12.68 12.680000000000000000 12.680000000000000000 +50 16081 82 82.0 99.55 99.55 99.550000000000000000 99.550000000000000000 +51 422 21 21.0 69.89 69.89 69.890000000000000000 69.890000000000000000 +51 3091 28 28.0 92.87 92.87 92.870000000000000000 92.870000000000000000 +51 4687 6 6.0 93.02 93.02 93.020000000000000000 93.020000000000000000 +51 5029 12 12.0 34.53 34.53 34.530000000000000000 34.530000000000000000 +51 5059 51 51.0 48.54 48.54 48.540000000000000000 48.540000000000000000 +51 6565 33 33.0 32.44 32.44 32.440000000000000000 32.440000000000000000 +51 8384 79 79.0 15.35 15.35 15.350000000000000000 15.350000000000000000 +51 9311 90 90.0 39.48 39.48 39.480000000000000000 39.480000000000000000 +51 10133 54 54.0 46.71 46.71 46.710000000000000000 46.710000000000000000 +51 11234 NULL NULL NULL NULL NULL NULL +51 12625 53 53.0 97.27 97.27 97.270000000000000000 97.270000000000000000 +51 13199 97 97.0 99.32 99.32 99.320000000000000000 99.320000000000000000 +51 17483 22 22.0 31.99 31.99 31.990000000000000000 31.990000000000000000 +51 17705 66 66.0 46.11 46.11 46.110000000000000000 46.110000000000000000 +52 2420 90 90.0 22.31 22.31 22.310000000000000000 22.310000000000000000 +52 3334 73 73.0 29.2 29.2 29.200000000000000000 29.200000000000000000 +52 6098 NULL NULL 4.83 4.83 4.830000000000000000 4.830000000000000000 +52 7606 45 45.0 42.51 42.51 42.510000000000000000 42.510000000000000000 +52 11488 76 76.0 78.68 78.68 78.680000000000000000 78.680000000000000000 +52 15649 29 29.0 22.86 22.86 22.860000000000000000 22.860000000000000000 +52 16646 48 48.0 95.82 95.82 95.820000000000000000 95.820000000000000000 +52 17402 91 91.0 81.94 81.94 81.940000000000000000 81.940000000000000000 +52 17456 37 37.0 7.93 7.93 7.930000000000000000 7.930000000000000000 +53 1114 40 40.0 28.34 28.34 28.340000000000000000 28.340000000000000000 +53 2095 62 62.0 23.98 23.98 23.980000000000000000 23.980000000000000000 +53 2786 70 70.0 76.55 76.55 76.550000000000000000 76.550000000000000000 +53 2887 39 39.0 66.68 66.68 66.680000000000000000 66.680000000000000000 +53 7546 58 58.0 73.79 73.79 73.790000000000000000 73.790000000000000000 +53 11348 38 38.0 5.54 5.54 5.540000000000000000 5.540000000000000000 +53 13220 76 76.0 27.93 27.93 27.930000000000000000 27.930000000000000000 +53 13795 38 38.0 93.96 93.96 93.960000000000000000 93.960000000000000000 +53 15991 37 37.0 77.75 77.75 77.750000000000000000 77.750000000000000000 +53 16420 14 14.0 36.72 36.72 36.720000000000000000 36.720000000000000000 +53 16648 79 79.0 55.29 55.29 55.290000000000000000 55.290000000000000000 +53 17296 43 43.0 21.4 21.4 21.400000000000000000 21.400000000000000000 +53 17560 15 15.0 46.39 46.39 46.390000000000000000 46.390000000000000000 +54 702 40 40.0 16.76 16.76 16.760000000000000000 16.760000000000000000 +54 825 50 50.0 99.64 99.64 99.640000000000000000 99.640000000000000000 +54 1165 62 62.0 69.84 69.84 69.840000000000000000 69.840000000000000000 +54 3861 NULL NULL NULL NULL NULL NULL +54 6517 40 40.0 23.38 23.38 23.380000000000000000 23.380000000000000000 +54 9159 75 75.0 55.47 55.47 55.470000000000000000 55.470000000000000000 +54 14737 38 38.0 29.2 29.2 29.200000000000000000 29.200000000000000000 +54 16059 15 15.0 7.9 7.9 7.900000000000000000 7.900000000000000000 +54 16974 NULL NULL NULL NULL NULL NULL +54 17479 34 34.0 94.14 94.14 94.140000000000000000 94.140000000000000000 +55 1339 16 16.0 71.32 71.32 71.320000000000000000 71.320000000000000000 +55 3001 7 7.0 57.58 57.58 57.580000000000000000 57.580000000000000000 +55 5137 33 33.0 57.28 57.28 57.280000000000000000 57.280000000000000000 +55 9703 44 44.0 57.21 57.21 57.210000000000000000 57.210000000000000000 +55 12170 92 92.0 69.53 69.53 69.530000000000000000 69.530000000000000000 +55 12205 90 90.0 56.92 56.92 56.920000000000000000 56.920000000000000000 +55 14135 36 36.0 26.4 26.4 26.400000000000000000 26.400000000000000000 +55 14923 71 71.0 30.04 30.04 30.040000000000000000 30.040000000000000000 +55 17677 17 17.0 26.59 26.59 26.590000000000000000 26.590000000000000000 +56 4242 2 2.0 88.74 88.74 88.740000000000000000 88.740000000000000000 +56 4506 57 57.0 69.45 69.45 69.450000000000000000 69.450000000000000000 +56 8353 35 35.0 80.42 80.42 80.420000000000000000 80.420000000000000000 +56 8691 59 59.0 98.91 98.91 98.910000000000000000 98.910000000000000000 +56 8707 68 68.0 79.7 79.7 79.700000000000000000 79.700000000000000000 +56 10362 54 54.0 82.62 82.62 82.620000000000000000 82.620000000000000000 +56 16620 23 23.0 9.94 9.94 9.940000000000000000 9.940000000000000000 +56 17331 74 74.0 32.12 32.12 32.120000000000000000 32.120000000000000000 +57 3253 71 71.0 91.02 91.02 91.020000000000000000 91.020000000000000000 +57 4028 88 88.0 82.23 82.23 82.230000000000000000 82.230000000000000000 +57 4933 22 22.0 93.86 93.86 93.860000000000000000 93.860000000000000000 +57 12596 91 91.0 36.67 36.67 36.670000000000000000 36.670000000000000000 +57 12721 62 62.0 76.4 76.4 76.400000000000000000 76.400000000000000000 +57 12740 52 52.0 55.58 55.58 55.580000000000000000 55.580000000000000000 +57 15182 86 86.0 84.85 84.85 84.850000000000000000 84.850000000000000000 +57 17729 26 26.0 97.2 97.2 97.200000000000000000 97.200000000000000000 +57 17993 99 99.0 NULL NULL NULL NULL +58 1829 52 52.0 19.97 19.97 19.970000000000000000 19.970000000000000000 +58 3848 6 6.0 45.41 45.41 45.410000000000000000 45.410000000000000000 +58 5117 2 2.0 56.01 56.01 56.010000000000000000 56.010000000000000000 +58 7649 19 19.0 44.04 44.04 44.040000000000000000 44.040000000000000000 +58 9743 62 62.0 73.14 73.14 73.140000000000000000 73.140000000000000000 +58 10802 14 14.0 79.64 79.64 79.640000000000000000 79.640000000000000000 +58 15635 6 6.0 82.45 82.45 82.450000000000000000 82.450000000000000000 +58 16472 6 6.0 7.58 7.58 7.580000000000000000 7.580000000000000000 +58 16949 35 35.0 25.76 25.76 25.760000000000000000 25.760000000000000000 +59 3133 92 92.0 14.57 14.57 14.570000000000000000 14.570000000000000000 +59 3546 22 22.0 64.21 64.21 64.210000000000000000 64.210000000000000000 +59 5772 70 70.0 56.19 56.19 56.190000000000000000 56.190000000000000000 +59 7087 80 80.0 58.71 58.71 58.710000000000000000 58.710000000000000000 +59 8010 46 46.0 20.15 20.15 20.150000000000000000 20.150000000000000000 +59 8335 36 36.0 32.82 32.82 32.820000000000000000 32.820000000000000000 +59 9348 62 62.0 83.62 83.62 83.620000000000000000 83.620000000000000000 +59 9397 92 92.0 70.69 70.69 70.690000000000000000 70.690000000000000000 +59 10651 100 100.0 35.78 35.78 35.780000000000000000 35.780000000000000000 +59 11916 19 19.0 34.31 34.31 34.310000000000000000 34.310000000000000000 +59 12858 90 90.0 61.18 61.18 61.180000000000000000 61.180000000000000000 +59 14529 44 44.0 42.76 42.76 42.760000000000000000 42.760000000000000000 +60 97 50 50.0 37.49 37.49 37.490000000000000000 37.490000000000000000 +60 555 62 62.0 49.17 49.17 49.170000000000000000 49.170000000000000000 +60 633 71 71.0 96.74 96.74 96.740000000000000000 96.740000000000000000 +60 999 43 43.0 22.13 22.13 22.130000000000000000 22.130000000000000000 +60 1117 78 78.0 46.63 46.63 46.630000000000000000 46.630000000000000000 +60 1573 90 90.0 19.02 19.02 19.020000000000000000 19.020000000000000000 +60 4041 25 25.0 36.26 36.26 36.260000000000000000 36.260000000000000000 +60 4235 28 28.0 29.67 29.67 29.670000000000000000 29.670000000000000000 +60 4513 72 72.0 79.56 79.56 79.560000000000000000 79.560000000000000000 +60 4937 22 22.0 27.75 27.75 27.750000000000000000 27.750000000000000000 +60 7231 95 95.0 45.42 45.42 45.420000000000000000 45.420000000000000000 +60 10277 62 62.0 28.05 28.05 28.050000000000000000 28.050000000000000000 +60 10393 75 75.0 98.86 98.86 98.860000000000000000 98.860000000000000000 +60 13975 14 14.0 76.01 76.01 76.010000000000000000 76.010000000000000000 +60 16887 25 25.0 17.92 17.92 17.920000000000000000 17.920000000000000000 +60 17755 88 88.0 52.17 52.17 52.170000000000000000 52.170000000000000000 +61 1106 4 4.0 78.21 78.21 78.210000000000000000 78.210000000000000000 +61 2264 36 36.0 60.94 60.94 60.940000000000000000 60.940000000000000000 +61 3362 48 48.0 67.92 67.92 67.920000000000000000 67.920000000000000000 +61 4567 26 26.0 29.6 29.6 29.600000000000000000 29.600000000000000000 +61 5528 78 78.0 13.85 13.85 13.850000000000000000 13.850000000000000000 +61 6380 77 77.0 69.52 69.52 69.520000000000000000 69.520000000000000000 +61 7591 78 78.0 91.99 91.99 91.990000000000000000 91.990000000000000000 +61 8924 11 11.0 86.51 86.51 86.510000000000000000 86.510000000000000000 +61 10330 8 8.0 46.45 46.45 46.450000000000000000 46.450000000000000000 +61 16462 26 26.0 24.34 24.34 24.340000000000000000 24.340000000000000000 +62 4093 94 94.0 5.53 5.53 5.530000000000000000 5.530000000000000000 +62 6403 NULL NULL 92.02 92.02 92.020000000000000000 92.020000000000000000 +62 8457 37 37.0 99.97 99.97 99.970000000000000000 99.970000000000000000 +62 10149 75 75.0 48.36 48.36 48.360000000000000000 48.360000000000000000 +62 12163 29 29.0 16.7 16.7 16.700000000000000000 16.700000000000000000 +62 12199 5 5.0 85.54 85.54 85.540000000000000000 85.540000000000000000 +62 12407 NULL NULL NULL NULL NULL NULL +62 13559 80 80.0 52.56 52.56 52.560000000000000000 52.560000000000000000 +62 15399 74 74.0 71.7 71.7 71.700000000000000000 71.700000000000000000 +62 15733 40 40.0 28.03 28.03 28.030000000000000000 28.030000000000000000 +62 16151 93 93.0 84.72 84.72 84.720000000000000000 84.720000000000000000 +63 4488 73 73.0 22.85 22.85 22.850000000000000000 22.850000000000000000 +63 5079 79 79.0 36.05 36.05 36.050000000000000000 36.050000000000000000 +63 5217 66 66.0 15.71 15.71 15.710000000000000000 15.710000000000000000 +63 5658 99 99.0 88.78 88.78 88.780000000000000000 88.780000000000000000 +63 9319 80 80.0 9.27 9.27 9.270000000000000000 9.270000000000000000 +63 11370 38 38.0 56.43 56.43 56.430000000000000000 56.430000000000000000 +63 11946 85 85.0 94.28 94.28 94.280000000000000000 94.280000000000000000 +63 13339 19 19.0 19.44 19.44 19.440000000000000000 19.440000000000000000 +63 15793 40 40.0 75.62 75.62 75.620000000000000000 75.620000000000000000 +63 16569 69 69.0 NULL NULL NULL NULL +64 1213 NULL NULL 38.46 38.46 38.460000000000000000 38.460000000000000000 +64 3090 87 87.0 78.06 78.06 78.060000000000000000 78.060000000000000000 +64 3963 NULL NULL NULL NULL NULL NULL +64 11835 82 82.0 30.65 30.65 30.650000000000000000 30.650000000000000000 +64 13224 NULL NULL NULL NULL NULL NULL +64 14407 8 8.0 44.36 44.36 44.360000000000000000 44.360000000000000000 +64 15867 59 59.0 43.77 43.77 43.770000000000000000 43.770000000000000000 +64 15936 30 30.0 56.24 56.24 56.240000000000000000 56.240000000000000000 +64 16921 19 19.0 98.61 98.61 98.610000000000000000 98.610000000000000000 +64 17586 78 78.0 77.26 77.26 77.260000000000000000 77.260000000000000000 +64 17617 17 17.0 91.67 91.67 91.670000000000000000 91.670000000000000000 +65 2287 100 100.0 91.8 91.8 91.800000000000000000 91.800000000000000000 +65 4227 42 42.0 45.38 45.38 45.380000000000000000 45.380000000000000000 +65 9625 51 51.0 40.95 40.95 40.950000000000000000 40.950000000000000000 +65 9847 54 54.0 64.26 64.26 64.260000000000000000 64.260000000000000000 +65 13897 40 40.0 52.84 52.84 52.840000000000000000 52.840000000000000000 +65 14905 85 85.0 81.24 81.24 81.240000000000000000 81.240000000000000000 +65 15177 55 55.0 89.19 89.19 89.190000000000000000 89.190000000000000000 +65 17025 67 67.0 25.52 25.52 25.520000000000000000 25.520000000000000000 +66 6507 76 76.0 43.81 43.81 43.810000000000000000 43.810000000000000000 +66 7033 65 65.0 4.08 4.08 4.080000000000000000 4.080000000000000000 +66 7227 66 66.0 92.15 92.15 92.150000000000000000 92.150000000000000000 +66 8197 41 41.0 84.22 84.22 84.220000000000000000 84.220000000000000000 +66 9237 29 29.0 76.94 76.94 76.940000000000000000 76.940000000000000000 +66 10019 10 10.0 48.77 48.77 48.770000000000000000 48.770000000000000000 +66 11419 66 66.0 10.12 10.12 10.120000000000000000 10.120000000000000000 +66 15629 20 20.0 22.04 22.04 22.040000000000000000 22.040000000000000000 +66 16745 91 91.0 9.53 9.53 9.530000000000000000 9.530000000000000000 +66 16795 28 28.0 42.0 42.0 42.000000000000000000 42.000000000000000000 +67 757 77 77.0 94.12 94.12 94.120000000000000000 94.120000000000000000 +67 2133 74 74.0 71.99 71.99 71.990000000000000000 71.990000000000000000 +67 3439 73 73.0 23.52 23.52 23.520000000000000000 23.520000000000000000 +67 4155 87 87.0 87.74 87.74 87.740000000000000000 87.740000000000000000 +67 5113 NULL NULL 49.59 49.59 49.590000000000000000 49.590000000000000000 +67 7020 79 79.0 97.01 97.01 97.010000000000000000 97.010000000000000000 +67 7507 77 77.0 26.78 26.78 26.780000000000000000 26.780000000000000000 +67 8469 59 59.0 NULL NULL NULL NULL +67 8871 71 71.0 78.59 78.59 78.590000000000000000 78.590000000000000000 +67 12087 70 70.0 80.71 80.71 80.710000000000000000 80.710000000000000000 +67 15699 44 44.0 34.59 34.59 34.590000000000000000 34.590000000000000000 +68 1387 74 74.0 90.2 90.2 90.200000000000000000 90.200000000000000000 +68 1603 57 57.0 21.03 21.03 21.030000000000000000 21.030000000000000000 +68 1820 54 54.0 55.82 55.82 55.820000000000000000 55.820000000000000000 +68 2035 22 22.0 54.35 54.35 54.350000000000000000 54.350000000000000000 +68 2296 52 52.0 98.9 98.9 98.900000000000000000 98.900000000000000000 +68 2564 83 83.0 77.32 77.32 77.320000000000000000 77.320000000000000000 +68 5162 23 23.0 83.48 83.48 83.480000000000000000 83.480000000000000000 +68 6763 77 77.0 96.29 96.29 96.290000000000000000 96.290000000000000000 +68 7765 NULL NULL 69.58 69.58 69.580000000000000000 69.580000000000000000 +68 12526 3 3.0 13.06 13.06 13.060000000000000000 13.060000000000000000 +68 12724 88 88.0 9.63 9.63 9.630000000000000000 9.630000000000000000 +68 17426 2 2.0 48.36 48.36 48.360000000000000000 48.360000000000000000 +68 17600 13 13.0 52.66 52.66 52.660000000000000000 52.660000000000000000 +69 322 45 45.0 NULL NULL NULL NULL +69 337 34 34.0 20.99 20.99 20.990000000000000000 20.990000000000000000 +69 4208 9 9.0 99.77 99.77 99.770000000000000000 99.770000000000000000 +69 4267 10 10.0 72.37 72.37 72.370000000000000000 72.370000000000000000 +69 6136 7 7.0 49.79 49.79 49.790000000000000000 49.790000000000000000 +69 7264 67 67.0 78.29 78.29 78.290000000000000000 78.290000000000000000 +69 7822 30 30.0 78.1 78.1 78.100000000000000000 78.100000000000000000 +69 8599 53 53.0 56.42 56.42 56.420000000000000000 56.420000000000000000 +69 11137 68 68.0 22.04 22.04 22.040000000000000000 22.040000000000000000 +69 13489 66 66.0 2.68 2.68 2.680000000000000000 2.680000000000000000 +69 13792 NULL NULL 85.64 85.64 85.640000000000000000 85.640000000000000000 +69 15448 16 16.0 94.38 94.38 94.380000000000000000 94.380000000000000000 +70 1592 53 53.0 99.59 99.59 99.590000000000000000 99.590000000000000000 +70 2462 NULL NULL 92.7 92.7 92.700000000000000000 92.700000000000000000 +70 3296 48 48.0 10.23 10.23 10.230000000000000000 10.230000000000000000 +70 3947 NULL NULL 63.8 63.8 63.800000000000000000 63.800000000000000000 +70 6185 82 82.0 84.6 84.6 84.600000000000000000 84.600000000000000000 +70 6425 NULL NULL NULL NULL NULL NULL +70 8893 17 17.0 63.51 63.51 63.510000000000000000 63.510000000000000000 +70 9857 20 20.0 54.96 54.96 54.960000000000000000 54.960000000000000000 +70 14549 4 4.0 35.39 35.39 35.390000000000000000 35.390000000000000000 +70 17815 95 95.0 36.89 36.89 36.890000000000000000 36.890000000000000000 +71 457 75 75.0 27.02 27.02 27.020000000000000000 27.020000000000000000 +71 1888 4 4.0 40.47 40.47 40.470000000000000000 40.470000000000000000 +71 2098 51 51.0 57.87 57.87 57.870000000000000000 57.870000000000000000 +71 4144 49 49.0 26.75 26.75 26.750000000000000000 26.750000000000000000 +71 5858 NULL NULL NULL NULL NULL NULL +71 6008 54 54.0 38.98 38.98 38.980000000000000000 38.980000000000000000 +71 7504 3 3.0 78.44 78.44 78.440000000000000000 78.440000000000000000 +71 8887 10 10.0 61.4 61.4 61.400000000000000000 61.400000000000000000 +71 9274 36 36.0 12.39 12.39 12.390000000000000000 12.390000000000000000 +71 9769 79 79.0 52.15 52.15 52.150000000000000000 52.150000000000000000 +71 9790 96 96.0 37.78 37.78 37.780000000000000000 37.780000000000000000 +71 9997 26 26.0 53.28 53.28 53.280000000000000000 53.280000000000000000 +71 10108 66 66.0 9.49 9.49 9.490000000000000000 9.490000000000000000 +71 10288 30 30.0 29.57 29.57 29.570000000000000000 29.570000000000000000 +71 11168 79 79.0 24.66 24.66 24.660000000000000000 24.660000000000000000 +71 17246 90 90.0 50.57 50.57 50.570000000000000000 50.570000000000000000 +72 1535 9 9.0 69.06 69.06 69.060000000000000000 69.060000000000000000 +72 5917 85 85.0 NULL NULL NULL NULL +72 6113 45 45.0 59.65 59.65 59.650000000000000000 59.650000000000000000 +72 6671 13 13.0 42.82 42.82 42.820000000000000000 42.820000000000000000 +72 9860 26 26.0 69.92 69.92 69.920000000000000000 69.920000000000000000 +72 10427 66 66.0 55.31 55.31 55.310000000000000000 55.310000000000000000 +72 10753 16 16.0 32.01 32.01 32.010000000000000000 32.010000000000000000 +72 11741 62 62.0 79.25 79.25 79.250000000000000000 79.250000000000000000 +72 12788 29 29.0 34.57 34.57 34.570000000000000000 34.570000000000000000 +72 12901 57 57.0 1.64 1.64 1.640000000000000000 1.640000000000000000 +72 13085 94 94.0 85.13 85.13 85.130000000000000000 85.130000000000000000 +72 13423 62 62.0 34.39 34.39 34.390000000000000000 34.390000000000000000 +72 13904 37 37.0 40.39 40.39 40.390000000000000000 40.390000000000000000 +72 15587 87 87.0 19.04 19.04 19.040000000000000000 19.040000000000000000 +72 16765 56 56.0 89.44 89.44 89.440000000000000000 89.440000000000000000 +73 247 53 53.0 5.22 5.22 5.220000000000000000 5.220000000000000000 +73 1063 37 37.0 34.93 34.93 34.930000000000000000 34.930000000000000000 +73 3205 82 82.0 44.64 44.64 44.640000000000000000 44.640000000000000000 +73 4946 54 54.0 71.08 71.08 71.080000000000000000 71.080000000000000000 +73 6862 58 58.0 86.48 86.48 86.480000000000000000 86.480000000000000000 +73 10051 49 49.0 97.28 97.28 97.280000000000000000 97.280000000000000000 +73 12502 75 75.0 21.63 21.63 21.630000000000000000 21.630000000000000000 +73 15109 38 38.0 53.9 53.9 53.900000000000000000 53.900000000000000000 +73 16519 97 97.0 82.11 82.11 82.110000000000000000 82.110000000000000000 +73 16585 38 38.0 69.27 69.27 69.270000000000000000 69.270000000000000000 +73 17269 40 40.0 NULL NULL NULL NULL +74 326 29 29.0 76.73 76.73 76.730000000000000000 76.730000000000000000 +74 3104 78 78.0 52.23 52.23 52.230000000000000000 52.230000000000000000 +74 3175 23 23.0 50.69 50.69 50.690000000000000000 50.690000000000000000 +74 3278 NULL NULL NULL NULL NULL NULL +74 3542 96 96.0 93.18 93.18 93.180000000000000000 93.180000000000000000 +74 3754 26 26.0 89.35 89.35 89.350000000000000000 89.350000000000000000 +74 5492 54 54.0 31.24 31.24 31.240000000000000000 31.240000000000000000 +74 7694 17 17.0 36.61 36.61 36.610000000000000000 36.610000000000000000 +74 8653 12 12.0 4.33 4.33 4.330000000000000000 4.330000000000000000 +74 9620 95 95.0 56.35 56.35 56.350000000000000000 56.350000000000000000 +74 10069 99 99.0 99.98 99.98 99.980000000000000000 99.980000000000000000 +74 13208 87 87.0 82.61 82.61 82.610000000000000000 82.610000000000000000 +74 16694 72 72.0 36.04 36.04 36.040000000000000000 36.040000000000000000 +75 607 20 20.0 88.61 88.61 88.610000000000000000 88.610000000000000000 +75 2948 25 25.0 7.48 7.48 7.480000000000000000 7.480000000000000000 +75 4625 73 73.0 76.04 76.04 76.040000000000000000 76.040000000000000000 +75 6938 89 89.0 20.73 20.73 20.730000000000000000 20.730000000000000000 +75 6953 71 71.0 33.34 33.34 33.340000000000000000 33.340000000000000000 +75 8726 6 6.0 25.87 25.87 25.870000000000000000 25.870000000000000000 +75 9905 54 54.0 63.01 63.01 63.010000000000000000 63.010000000000000000 +75 10217 85 85.0 83.25 83.25 83.250000000000000000 83.250000000000000000 +75 11039 70 70.0 87.84 87.84 87.840000000000000000 87.840000000000000000 +75 14186 63 63.0 82.77 82.77 82.770000000000000000 82.770000000000000000 +75 16796 93 93.0 5.19 5.19 5.190000000000000000 5.190000000000000000 +76 257 5 5.0 8.47 8.47 8.470000000000000000 8.470000000000000000 +76 465 2 2.0 95.45 95.45 95.450000000000000000 95.450000000000000000 +76 1107 16 16.0 NULL NULL NULL NULL +76 1503 97 97.0 30.22 30.22 30.220000000000000000 30.220000000000000000 +76 2265 98 98.0 89.7 89.7 89.700000000000000000 89.700000000000000000 +76 2869 32 32.0 NULL NULL NULL NULL +76 3363 25 25.0 89.9 89.9 89.900000000000000000 89.900000000000000000 +76 4237 48 48.0 60.58 60.58 60.580000000000000000 60.580000000000000000 +76 4567 40 40.0 2.19 2.19 2.190000000000000000 2.190000000000000000 +76 5529 78 78.0 49.64 49.64 49.640000000000000000 49.640000000000000000 +76 6381 50 50.0 34.93 34.93 34.930000000000000000 34.930000000000000000 +76 7591 27 27.0 61.86 61.86 61.860000000000000000 61.860000000000000000 +76 8925 6 6.0 80.04 80.04 80.040000000000000000 80.040000000000000000 +76 10331 3 3.0 29.09 29.09 29.090000000000000000 29.090000000000000000 +76 16463 53 53.0 86.06 86.06 86.060000000000000000 86.060000000000000000 +77 992 62 62.0 21.65 21.65 21.650000000000000000 21.650000000000000000 +77 1399 34 34.0 96.21 96.21 96.210000000000000000 96.210000000000000000 +77 2713 85 85.0 85.72 85.72 85.720000000000000000 85.720000000000000000 +77 3868 89 89.0 3.72 3.72 3.720000000000000000 3.720000000000000000 +77 6289 30 30.0 26.16 26.16 26.160000000000000000 26.160000000000000000 +77 7339 88 88.0 31.13 31.13 31.130000000000000000 31.130000000000000000 +77 7448 95 95.0 29.07 29.07 29.070000000000000000 29.070000000000000000 +77 7486 49 49.0 NULL NULL NULL NULL +77 8686 38 38.0 45.3 45.3 45.300000000000000000 45.300000000000000000 +77 9220 90 90.0 87.41 87.41 87.410000000000000000 87.410000000000000000 +77 11918 36 36.0 25.95 25.95 25.950000000000000000 25.950000000000000000 +77 12439 95 95.0 74.32 74.32 74.320000000000000000 74.320000000000000000 +77 13456 48 48.0 85.61 85.61 85.610000000000000000 85.610000000000000000 +77 14815 18 18.0 69.28 69.28 69.280000000000000000 69.280000000000000000 +77 16687 16 16.0 67.63 67.63 67.630000000000000000 67.630000000000000000 +78 901 3 3.0 54.5 54.5 54.500000000000000000 54.500000000000000000 +78 3304 50 50.0 61.49 61.49 61.490000000000000000 61.490000000000000000 +78 3856 27 27.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +78 5965 78 78.0 7.47 7.47 7.470000000000000000 7.470000000000000000 +78 6044 59 59.0 15.94 15.94 15.940000000000000000 15.940000000000000000 +78 6110 43 43.0 28.45 28.45 28.450000000000000000 28.450000000000000000 +78 6500 76 76.0 38.42 38.42 38.420000000000000000 38.420000000000000000 +78 7576 87 87.0 60.62 60.62 60.620000000000000000 60.620000000000000000 +78 8611 79 79.0 95.42 95.42 95.420000000000000000 95.420000000000000000 +78 10507 6 6.0 50.83 50.83 50.830000000000000000 50.830000000000000000 +78 11209 7 7.0 88.17 88.17 88.170000000000000000 88.170000000000000000 +78 12706 19 19.0 81.1 81.1 81.100000000000000000 81.100000000000000000 +78 14996 39 39.0 36.47 36.47 36.470000000000000000 36.470000000000000000 +79 247 NULL NULL NULL NULL NULL NULL +79 1063 85 85.0 77.51 77.51 77.510000000000000000 77.510000000000000000 +79 3205 48 48.0 21.34 21.34 21.340000000000000000 21.340000000000000000 +79 4947 35 35.0 99.77 99.77 99.770000000000000000 99.770000000000000000 +79 6864 1 1.0 86.12 86.12 86.120000000000000000 86.120000000000000000 +79 10051 10 10.0 94.53 94.53 94.530000000000000000 94.530000000000000000 +79 10524 36 36.0 21.73 21.73 21.730000000000000000 21.730000000000000000 +79 12504 81 81.0 14.87 14.87 14.870000000000000000 14.870000000000000000 +79 14322 41 41.0 58.88 58.88 58.880000000000000000 58.880000000000000000 +79 15109 NULL NULL 45.07 45.07 45.070000000000000000 45.070000000000000000 +79 15498 3 3.0 94.64 94.64 94.640000000000000000 94.640000000000000000 +79 15888 58 58.0 99.75 99.75 99.750000000000000000 99.750000000000000000 +79 16519 9 9.0 69.84 69.84 69.840000000000000000 69.840000000000000000 +79 16585 93 93.0 70.7 70.7 70.700000000000000000 70.700000000000000000 +79 17269 81 81.0 5.88 5.88 5.880000000000000000 5.880000000000000000 +80 998 93 93.0 69.32 69.32 69.320000000000000000 69.320000000000000000 +80 1519 25 25.0 66.36 66.36 66.360000000000000000 66.360000000000000000 +80 1573 40 40.0 43.33 43.33 43.330000000000000000 43.330000000000000000 +80 4040 66 66.0 15.01 15.01 15.010000000000000000 15.010000000000000000 +80 4513 NULL NULL 76.02 76.02 76.020000000000000000 76.020000000000000000 +80 4622 1 1.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +80 7231 49 49.0 76.07 76.07 76.070000000000000000 76.070000000000000000 +80 7610 37 37.0 24.62 24.62 24.620000000000000000 24.620000000000000000 +80 10393 5 5.0 71.37 71.37 71.370000000000000000 71.370000000000000000 +80 12968 NULL NULL NULL NULL NULL NULL +80 13717 91 91.0 60.42 60.42 60.420000000000000000 60.420000000000000000 +80 13975 13 13.0 83.81 83.81 83.810000000000000000 83.810000000000000000 +80 16363 84 84.0 84.8 84.8 84.800000000000000000 84.800000000000000000 +80 16886 77 77.0 89.22 89.22 89.220000000000000000 89.220000000000000000 +80 17308 29 29.0 94.38 94.38 94.380000000000000000 94.380000000000000000 +80 17755 94 94.0 56.04 56.04 56.040000000000000000 56.040000000000000000 +81 4486 31 31.0 63.84 63.84 63.840000000000000000 63.840000000000000000 +81 5078 75 75.0 33.72 33.72 33.720000000000000000 33.720000000000000000 +81 5216 64 64.0 4.59 4.59 4.590000000000000000 4.590000000000000000 +81 5656 24 24.0 40.61 40.61 40.610000000000000000 40.610000000000000000 +81 7166 7 7.0 22.87 22.87 22.870000000000000000 22.870000000000000000 +81 7663 79 79.0 52.07 52.07 52.070000000000000000 52.070000000000000000 +81 8918 37 37.0 86.54 86.54 86.540000000000000000 86.540000000000000000 +81 9319 36 36.0 91.74 91.74 91.740000000000000000 91.740000000000000000 +81 11107 36 36.0 47.86 47.86 47.860000000000000000 47.860000000000000000 +81 11368 26 26.0 NULL NULL NULL NULL +81 13339 6 6.0 4.63 4.63 4.630000000000000000 4.630000000000000000 +81 15793 8 8.0 5.61 5.61 5.610000000000000000 5.610000000000000000 +82 2572 53 53.0 55.41 55.41 55.410000000000000000 55.410000000000000000 +82 7862 75 75.0 21.65 21.65 21.650000000000000000 21.650000000000000000 +82 13138 59 59.0 31.81 31.81 31.810000000000000000 31.810000000000000000 +82 14998 49 49.0 52.59 52.59 52.590000000000000000 52.590000000000000000 +82 17041 18 18.0 4.71 4.71 4.710000000000000000 4.710000000000000000 diff --git ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out index 92a6a6c..a84da2c 100644 --- ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/llap/vector_grouping_sets.q.out @@ -168,9 +168,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: s_store_id (type: string), 0 (type: int) mode: hash @@ -208,9 +210,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial @@ -298,9 +302,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string), 0 (type: int) mode: hash @@ -338,9 +344,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index efd49cd..f5cbe53 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -257,8 +257,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 25:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -297,8 +299,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index 7346811..3e2ed6f 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -304,9 +304,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index ec76750..9fca9cd 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -164,8 +164,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -193,8 +195,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -403,8 +407,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 3:int) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -432,8 +438,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -669,8 +677,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -698,8 +708,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -929,8 +941,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(VectorUDFAdaptor(hash(_col2,_col3)) -> 2:int) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -958,8 +972,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1232,8 +1248,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1257,8 +1275,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1531,8 +1551,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1556,8 +1578,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1830,8 +1854,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1855,8 +1881,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -2129,8 +2157,10 @@ STAGE PLANS: Group By Operator aggregations: sum(hash(_col2,_col3)) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -2154,8 +2184,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index f77f0ce..c02f879 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -3389,8 +3389,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -3500,8 +3502,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -3613,8 +3617,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -3721,8 +3727,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -3837,8 +3845,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -3919,8 +3929,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4030,8 +4042,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4138,8 +4152,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4274,8 +4290,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4398,8 +4416,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4531,8 +4551,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4652,8 +4674,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator native: true @@ -4771,8 +4795,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4799,8 +4825,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -4932,8 +4960,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5072,8 +5102,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5198,8 +5230,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5342,8 +5376,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5488,8 +5524,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5661,8 +5699,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkLongOperator native: true @@ -5822,8 +5862,10 @@ STAGE PLANS: native: true Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: className: VectorReduceSinkStringOperator native: true @@ -5956,9 +5998,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -6151,9 +6195,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -6348,9 +6394,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -6545,9 +6593,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -6745,9 +6795,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -6882,9 +6934,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -7082,9 +7136,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -7279,9 +7335,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -7528,9 +7586,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -7736,9 +7796,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -7986,9 +8048,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -8191,9 +8255,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -8397,9 +8463,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -8457,9 +8525,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -8685,9 +8755,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -8908,9 +8980,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -9117,9 +9191,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -9360,9 +9436,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -9605,9 +9683,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -9904,9 +9984,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -10171,9 +10253,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash @@ -10329,9 +10413,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -10527,9 +10613,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -10727,9 +10815,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -10927,9 +11017,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -11130,9 +11222,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -11267,9 +11361,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -11470,9 +11566,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -11670,9 +11768,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -11925,9 +12025,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -12136,9 +12238,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -12386,9 +12490,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -12594,9 +12700,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -12800,9 +12908,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -12860,9 +12970,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -13088,9 +13200,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -13311,9 +13425,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -13520,9 +13636,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -13763,9 +13881,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -14008,9 +14128,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -14313,9 +14435,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -14583,9 +14707,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash @@ -14741,9 +14867,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -14939,9 +15067,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -15139,9 +15269,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -15339,9 +15471,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col1 (type: int), _col1 (type: int) mode: hash @@ -15542,9 +15676,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -15679,9 +15815,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -15882,9 +16020,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -16082,9 +16222,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -16337,9 +16479,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -16548,9 +16692,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -16798,9 +16944,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -17006,9 +17154,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: string) mode: hash @@ -17212,9 +17362,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -17272,9 +17424,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -17500,9 +17654,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -17723,9 +17879,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -17932,9 +18090,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -18175,9 +18335,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -18420,9 +18582,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -18725,9 +18889,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -18995,9 +19161,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index d3586e0..8aabb62 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -44,9 +44,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -140,9 +142,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -180,9 +184,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -311,9 +317,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -408,9 +416,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: int) mode: hash @@ -448,9 +458,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_null_projection.q.out ql/src/test/results/clientpositive/llap/vector_null_projection.q.out index 84266a2..8c60363 100644 --- ql/src/test/results/clientpositive/llap/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_null_projection.q.out @@ -69,7 +69,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Stage: Stage-0 @@ -136,7 +136,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Map 4 Map Operator Tree: @@ -163,14 +163,14 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Key expression for GROUPBY operator: Data type void of Const void null not supported + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Reducer 3 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Key expression for GROUPBY operator: Data type void of Column[KEY._col0] not supported + notVectorizedReason: Key expression for GROUPBY operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out index aa1b9d8..33cc940 100644 --- ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/llap/vector_number_compare_projection.q.out @@ -181,8 +181,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 6) -> bigint className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: complete outputColumnNames: _col0 @@ -305,8 +307,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 8) -> bigint className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: complete outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index aa8ed4a..3dc952c 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -334,7 +334,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out index 7faf892..a99cb2b 100644 --- ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/llap/vector_orderby_5.q.out @@ -145,9 +145,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 7 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash @@ -188,9 +190,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 2857c6c..f64e739 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -736,8 +736,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -872,8 +874,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index ca38df5..c24a2d0 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -318,8 +318,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -454,8 +456,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index f963a62..4683b4b 100644 --- ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -115,8 +115,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -154,8 +156,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -284,8 +288,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -323,8 +329,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -453,8 +461,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -492,8 +502,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -609,8 +621,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -648,8 +662,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -765,8 +781,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -804,8 +822,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index e8444fc..c5f7128 100644 --- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -454,9 +454,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash @@ -497,9 +499,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial @@ -1386,9 +1390,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash @@ -1429,9 +1435,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial @@ -2342,9 +2350,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_time (type: timestamp) mode: hash @@ -2385,9 +2395,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: timestamp) mode: mergepartial @@ -2882,9 +2894,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash @@ -2925,9 +2939,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial @@ -3814,9 +3830,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_date (type: date) mode: hash @@ -3857,9 +3875,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: date) mode: mergepartial @@ -4770,9 +4790,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: fl_time (type: timestamp) mode: hash @@ -4813,9 +4835,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: timestamp) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out index 7348af8..579bb61 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_decimal.q.out @@ -65,9 +65,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash @@ -109,9 +111,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDecimal(col 4) -> decimal(20,10) className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1, col 2, col 3 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_string_concat.q.out ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index fb9e121..d5331ec 100644 --- ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -356,9 +356,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 19 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash @@ -397,9 +399,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out index b078068..bb89dd7 100644 --- ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/llap/vector_tablesample_rows.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain @@ -56,6 +56,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: string, string Stage: Stage-0 Fetch Operator @@ -81,11 +87,11 @@ POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY @@ -144,6 +150,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(18,9) Stage: Stage-2 Dependency Collection @@ -191,10 +203,10 @@ POSTHOOK: query: drop table decimal_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@decimal_2 POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y POSTHOOK: type: QUERY Explain @@ -237,10 +249,17 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint Reduce Operator Tree: Select Operator Select Vectorization: @@ -253,8 +272,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 1:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -263,8 +284,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 @@ -272,18 +295,26 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -315,10 +346,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### _c0 1 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail create temporary table dual as select 1 PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail create temporary table dual as select 1 POSTHOOK: type: CREATETABLE_AS_SELECT Explain diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 0dd278e..16edaac 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -30,20 +30,24 @@ POSTHOOK: Lineage: varchar_udf_1.d1 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1.d2 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1.d3 EXPRESSION [] POSTHOOK: Lineage: varchar_udf_1.d4 EXPRESSION [] -PREHOOK: query: explain -select +PREHOOK: query: explain vectorization detail +select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain -select +POSTHOOK: query: explain vectorization detail +select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -58,15 +62,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 13] + selectExpressions: StringGroupConcatColCol(col 0, col 1) -> 8:String_Family, StringGroupConcatColCol(col 2, col 3) -> 9:String_Family, StringGroupColEqualStringGroupColumn(col 10, col 12)(children: StringGroupConcatColCol(col 0, col 1) -> 10:String_Family, CastStringGroupToString(col 11)(children: StringGroupConcatColCol(col 2, col 3) -> 11:String_Family) -> 12:String) -> 13:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -74,6 +92,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -81,7 +113,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) @@ -89,7 +121,7 @@ from varchar_udf_1 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) @@ -98,20 +130,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238val_238 238val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -126,15 +162,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 13] + selectExpressions: StringUpper(col 1) -> 8:String, StringUpper(col 3) -> 9:String, StringGroupColEqualStringGroupColumn(col 10, col 12)(children: StringUpper(col 1) -> 10:String, CastStringGroupToString(col 11)(children: StringUpper(col 3) -> 11:String) -> 12:String) -> 13:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -142,6 +192,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -166,20 +230,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### VAL_238 VAL_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -194,15 +262,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 13] + selectExpressions: StringLower(col 1) -> 8:String, StringLower(col 3) -> 9:String, StringGroupColEqualStringGroupColumn(col 10, col 12)(children: StringLower(col 1) -> 10:String, CastStringGroupToString(col 11)(children: StringLower(col 3) -> 11:String) -> 12:String) -> 13:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -210,6 +292,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -234,20 +330,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -262,15 +362,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: ascii(c2) (type: int), ascii(c4) (type: int), (ascii(c2) = ascii(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(ascii(c2)) -> 8:int, VectorUDFAdaptor(ascii(c4)) -> 9:int, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFAdaptor(ascii(c2)) -> 10:int, VectorUDFAdaptor(ascii(c4)) -> 11:int) -> 12:long Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -278,6 +392,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, bigint Stage: Stage-0 Fetch Operator @@ -302,20 +430,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 118 118 true -PREHOOK: query: explain -select +PREHOOK: query: explain vectorization detail +select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain -select +POSTHOOK: query: explain vectorization detail +select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -330,15 +462,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: concat_ws('|', c1, c2) (type: string), concat_ws('|', c3, c4) (type: string), (concat_ws('|', c1, c2) = concat_ws('|', c3, c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 8:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 10:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -346,6 +492,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -353,7 +513,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) @@ -361,7 +521,7 @@ from varchar_udf_1 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) @@ -370,20 +530,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238|val_238 238|val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -398,15 +562,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: decode(encode(c2,'US-ASCII'),'US-ASCII') (type: string), decode(encode(c4,'US-ASCII'),'US-ASCII') (type: string), (decode(encode(c2,'US-ASCII'),'US-ASCII') = decode(encode(c4,'US-ASCII'),'US-ASCII')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [9, 10, 13] + selectExpressions: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 8:binary) -> 9:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 8:binary) -> 10:string, StringGroupColEqualStringGroupColumn(col 11, col 12)(children: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 8:binary) -> 11:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 8:binary) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -414,6 +592,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -438,20 +630,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -466,15 +662,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: instr(c2, '_') (type: int), instr(c4, '_') (type: int), (instr(c2, '_') = instr(c4, '_')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(instr(c2, '_')) -> 8:int, VectorUDFAdaptor(instr(c4, '_')) -> 9:int, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFAdaptor(instr(c2, '_')) -> 10:int, VectorUDFAdaptor(instr(c4, '_')) -> 11:int) -> 12:long Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -482,6 +692,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, bigint Stage: Stage-0 Fetch Operator @@ -506,20 +730,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 4 4 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -534,15 +762,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: replace(c1, '_', c2) (type: string), replace(c3, '_', c4) (type: string), (replace(c1, '_', c2) = replace(c3, '_', c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(replace(c1, '_', c2)) -> 8:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(replace(c1, '_', c2)) -> 10:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -550,6 +792,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [0, 1, 2, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -574,20 +830,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -602,15 +862,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 8:string, VectorUDFAdaptor(reverse(c4)) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(reverse(c2)) -> 10:string, VectorUDFAdaptor(reverse(c4)) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -618,6 +892,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -642,20 +930,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 832_lav 832_lav true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -670,15 +962,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: next_day(d1, 'TU') (type: string), next_day(d4, 'WE') (type: string), (next_day(d1, 'TU') = next_day(d4, 'WE')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(next_day(d1, 'TU')) -> 8:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(next_day(d1, 'TU')) -> 10:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -686,6 +992,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [4, 7] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -710,20 +1030,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 2015-01-20 2017-01-18 false -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -738,15 +1062,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: months_between(d1, d3) (type: double), months_between(d2, d4) (type: double), (months_between(d1, d3) = months_between(d2, d4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(months_between(d1, d3)) -> 8:double, VectorUDFAdaptor(months_between(d2, d4)) -> 9:double, DoubleColEqualDoubleColumn(col 10, col 11)(children: VectorUDFAdaptor(months_between(d1, d3)) -> 10:double, VectorUDFAdaptor(months_between(d2, d4)) -> 11:double) -> 12:long Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -754,6 +1092,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [4, 5, 6, 7] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, bigint Stage: Stage-0 Fetch Operator @@ -778,20 +1130,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -23.90322581 -23.90322581 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -806,15 +1162,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: StringLength(col 1) -> 8:Long, StringLength(col 3) -> 9:Long, LongColEqualLongColumn(col 10, col 11)(children: StringLength(col 1) -> 10:Long, StringLength(col 3) -> 11:Long) -> 12:long Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -822,6 +1192,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, bigint Stage: Stage-0 Fetch Operator @@ -846,20 +1230,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 7 7 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -874,15 +1262,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 10] + selectExpressions: ConstantVectorExpression(val 5) -> 8:long, ConstantVectorExpression(val 5) -> 9:long, ConstantVectorExpression(val 1) -> 10:long Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -890,6 +1292,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint Stage: Stage-0 Fetch Operator @@ -914,20 +1330,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 5 5 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -942,15 +1362,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: lpad(c2, 15, ' ') (type: string), lpad(c4, 15, ' ') (type: string), (lpad(c2, 15, ' ') = lpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 8:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 10:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -958,6 +1392,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -982,20 +1430,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1010,15 +1462,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: StringLTrim(col 1) -> 8:String, StringLTrim(col 3) -> 9:String, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: StringLTrim(col 1) -> 10:String, StringLTrim(col 3) -> 11:String) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1026,6 +1492,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1050,20 +1530,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1078,15 +1562,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 8:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 9:boolean, LongColEqualLongColumn(col 10, col 11)(children: VectorUDFAdaptor(c2 regexp 'val') -> 10:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 11:boolean) -> 12:long Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1094,6 +1592,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, bigint, bigint, bigint Stage: Stage-0 Fetch Operator @@ -1118,20 +1630,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### true true true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1146,15 +1662,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 8:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 10:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1162,6 +1692,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1186,20 +1730,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 238 238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1214,15 +1762,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 8:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 10:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1230,6 +1792,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1254,20 +1830,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### replaced_238 replaced_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1282,15 +1862,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 8:string, VectorUDFAdaptor(reverse(c4)) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(reverse(c2)) -> 10:string, VectorUDFAdaptor(reverse(c4)) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1298,6 +1892,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1322,20 +1930,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### 832_lav 832_lav true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1350,15 +1962,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: rpad(c2, 15, ' ') (type: string), rpad(c4, 15, ' ') (type: string), (rpad(c2, 15, ' ') = rpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 8:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 10:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1366,6 +1992,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1390,20 +2030,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1418,15 +2062,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: StringRTrim(col 1) -> 8:String, StringRTrim(col 3) -> 9:String, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: StringRTrim(col 1) -> 10:String, StringRTrim(col 3) -> 11:String) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1434,6 +2092,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1458,18 +2130,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1484,22 +2160,50 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: sentences('See spot run. See jane run.') (type: array>), sentences('See spot run. See jane run.') (type: array>) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9] + selectExpressions: VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 8:array>, VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 9:array> Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: array>, array> Stage: Stage-0 Fetch Operator @@ -1522,18 +2226,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1548,22 +2256,50 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: split(c2, '_') (type: array), split(c4, '_') (type: array) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9] + selectExpressions: VectorUDFAdaptor(split(c2, '_')) -> 8:array, VectorUDFAdaptor(split(c4, '_')) -> 9:array Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: array, array Stage: Stage-0 Fetch Operator @@ -1586,18 +2322,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### ["val","238"] ["val","238"] -PREHOOK: query: explain -select +PREHOOK: query: explain vectorization detail +select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain -select +POSTHOOK: query: explain vectorization detail +select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1612,22 +2352,50 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map), str_to_map('a:1,b:2,c:3',',',':') (type: map) outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9] + selectExpressions: VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 8:map, VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 9:map Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: map, map Stage: Stage-0 Fetch Operator @@ -1635,14 +2403,14 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select +PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### -POSTHOOK: query: select +POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1 limit 1 @@ -1650,20 +2418,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### {"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1678,15 +2450,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: StringSubstrColStartLen(col 1, start 0, length 3) -> 8:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 9:string, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: StringSubstrColStartLen(col 1, start 0, length 3) -> 10:string, StringSubstrColStartLen(col 3, start 0, length 3) -> 11:string) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1694,6 +2480,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1718,20 +2518,24 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val val true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1 limit 1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1746,15 +2550,29 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 12] + selectExpressions: StringTrim(col 1) -> 8:String, StringTrim(col 3) -> 9:String, StringGroupColEqualStringGroupColumn(col 10, col 11)(children: StringTrim(col 1) -> 10:String, StringTrim(col 3) -> 11:String) -> 12:boolean Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 1 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1762,6 +2580,20 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 + scratchColumnTypeNames: string, string, string, string, bigint Stage: Stage-0 Fetch Operator @@ -1786,18 +2618,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 true -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1830,8 +2666,19 @@ STAGE PLANS: value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reducer 2 Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) @@ -1867,18 +2714,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{3}{2}{3}{1}{0}{2}{0}{1}{0}{0}{1}{3}{2}{0}{3}"} -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1896,31 +2747,89 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(c2), min(c4) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 1) -> string, VectorUDAFMinString(col 3) -> string + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1] Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMinString(col 1) -> string + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1948,18 +2857,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1 #### A masked pattern was here #### val_238 val_238 -PREHOOK: query: explain +PREHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1 PREHOOK: type: QUERY -POSTHOOK: query: explain +POSTHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1 POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1977,31 +2890,89 @@ STAGE PLANS: TableScan alias: varchar_udf_1 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3] Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(c2), max(c4) + Group By Vectorization: + aggregators: VectorUDAFMaxString(col 1) -> string, VectorUDAFMaxString(col 3) -> string + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1] Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 8 + includeColumns: [1, 3] + dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMaxString(col 0) -> string, VectorUDAFMaxString(col 1) -> string + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 28edb6f..f137c63 100644 --- ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -58,9 +58,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -101,9 +103,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index b44e749..fba9c07 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -52,8 +52,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -62,8 +64,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -76,23 +80,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:tinyint, VALUE._col1:tinyint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -102,8 +119,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -111,10 +130,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:tinyint, VALUE._col0:tinyint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -160,12 +185,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 @@ -208,8 +233,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -218,8 +245,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -232,23 +261,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -258,18 +300,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -375,18 +425,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -455,7 +507,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -463,7 +515,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -509,8 +561,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -519,8 +573,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -533,23 +589,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -559,8 +628,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -568,10 +639,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -617,12 +694,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 @@ -665,8 +742,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -675,8 +754,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -689,23 +770,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -715,18 +809,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -832,18 +934,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -912,7 +1016,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -920,7 +1024,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -966,8 +1070,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -976,8 +1082,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -990,23 +1098,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:float, VALUE._col1:float, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -1016,8 +1137,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1025,10 +1148,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:float, VALUE._col0:float, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -1074,12 +1203,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 @@ -1122,8 +1251,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 4) -> double className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -1132,8 +1263,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap @@ -1146,23 +1279,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumDouble(col 0) -> double className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1172,18 +1318,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -1289,18 +1443,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -1370,7 +1526,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1397,7 +1553,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1466,17 +1622,24 @@ STAGE PLANS: Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap @@ -1484,30 +1647,60 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFVarSampFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 5) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19] + selectExpressions: DoubleColUnaryMinus(col 0) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 7:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10, col 8)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColDivideDoubleColumn(col 14, col 15)(children: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2, col 1) -> 14:double, DoubleColUnaryMinus(col 2) -> 15:double, DoubleColMultiplyDoubleColumn(col 17, col 16)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0) -> 16:double) -> 18:double, LongColUnaryMinus(col 5) -> 19:long Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index e0a4344..4699c2e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -1,3 +1,178 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterLongColGreaterLongScalar(col 11, val 0) -> boolean) -> boolean, FilterLongColLessLongColumn(col 3, col 0)(children: col 0) -> boolean, FilterLongColGreaterLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean) -> boolean + predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5] + Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct, VALUE._col5:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFMaxLong(col 2) -> tinyint, VectorUDAFMaxLong(col 3) -> int, VectorUDAFVarSampFinal(col 4) -> double, VectorUDAFCountMerge(col 5) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14] + selectExpressions: DoubleColDivideDoubleScalar(col 0, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 7:double, DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11, val 79.553)(children: CastLongToDecimal(col 3) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 1, col 10)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(val -563, col 3) -> 14:long + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), diff --git ql/src/test/results/clientpositive/llap/vectorization_10.q.out ql/src/test/results/clientpositive/llap/vectorization_10.q.out index 9dad4c4..f06c2db 100644 --- ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -1,3 +1,121 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + Statistics: Num rows: 5461 Data size: 1107444 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double + Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5461 Data size: 1082056 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(6,2), decimal(11,4), double, double, double, double, double, bigint, bigint, bigint, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cdouble, ctimestamp1, ctinyint, diff --git ql/src/test/results/clientpositive/llap/vectorization_11.q.out ql/src/test/results/clientpositive/llap/vectorization_11.q.out index dff58da..2b8c391 100644 --- ql/src/test/results/clientpositive/llap/vectorization_11.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_11.q.out @@ -1,3 +1,103 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2381474 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7, col 6) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 6144 Data size: 1190792 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0) (type: double), (cdouble * -5638.15) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 10, 5, 8, 12, 13, 14, 16, 15] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1)(children: col 1) -> 12:long, DoubleColSubtractDoubleScalar(col 5, val 9763215.5639) -> 13:double, DoubleColUnaryMinus(col 5) -> 14:double, DoubleColAddDoubleScalar(col 15, val 6981.0)(children: DoubleColUnaryMinus(col 5) -> 15:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5, val -5638.15) -> 15:double + Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, double, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cstring1, cboolean1, cdouble, diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 6a7f69c..863a26f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -1,3 +1,243 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1647554 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) + Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) + outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 6, 10] + Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint, VectorUDAFStdSampLong(col 3) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 3, col 6, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + sort order: ++++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [4, 5, 6, 7, 8] + Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 4) -> bigint, VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFAvgFinal(col 6) -> double, VectorUDAFSumLong(col 7) -> bigint, VectorUDAFStdPopFinal(col 8) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4] + keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 154 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 9:double, LongColUnaryMinus(col 1) -> 10:long, LongColMultiplyLongColumn(col 1, col 4) -> 11:long, DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14)(children: DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16)(children: CastLongToDecimal(col 1) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6, col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14)(children: DoubleColUnaryMinus(col 19)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20, col 21)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 21:double) -> 14:double + Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] + Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 19 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:string, VALUE._col0:boolean, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(22,2), VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double, VALUE._col14:double, VALUE._col15:double + partitionColumnCount: 0 + scratchColumnTypeNames: timestamp + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] + selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cbigint, cboolean1, cstring1, diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 3ae67b6..d3e4309 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -109,11 +109,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -122,6 +123,12 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2, 3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [5, 6, 7, 8, 9, 10] Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap @@ -129,21 +136,44 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(11,4) Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:float, VALUE._col5:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5) -> tinyint, VectorUDAFSumDouble(col 6) -> double, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFStdPopFinal(col 8) -> double, VectorUDAFMaxDouble(col 9) -> float, VectorUDAFMinLong(col 10) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -151,10 +181,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -162,10 +203,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaaaaaaaaa + reduceColumnSortOrder: +++++++++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 21 + dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:tinyint, KEY.reducesinkkey2:timestamp, KEY.reducesinkkey3:float, KEY.reducesinkkey4:string, KEY.reducesinkkey5:tinyint, KEY.reducesinkkey6:tinyint, KEY.reducesinkkey7:tinyint, KEY.reducesinkkey8:double, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:double, KEY.reducesinkkey13:double, KEY.reducesinkkey14:double, KEY.reducesinkkey15:decimal(7,3), KEY.reducesinkkey16:double, KEY.reducesinkkey17:double, KEY.reducesinkkey18:float, KEY.reducesinkkey19:double, KEY.reducesinkkey20:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) @@ -417,11 +464,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -430,6 +478,10 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap @@ -437,21 +489,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5) -> tinyint, VectorUDAFSumDouble(col 6) -> double, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFStdPopFinal(col 8) -> double, VectorUDAFMaxDouble(col 9) -> float, VectorUDAFMinLong(col 10) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -459,10 +522,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 541d13f..418e4ea 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -86,15 +86,36 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterTimestampColLessTimestampColumn(col 9, col 8) -> boolean) -> boolean, FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3, val -257) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) Statistics: Num rows: 606 Data size: 105558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 4, 6, 10, 5, 13] + selectExpressions: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5) -> 12:double) -> 13:double Statistics: Num rows: 606 Data size: 105558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 13) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFVarSampDouble(col 4) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 6, col 4, col 5, col 8, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -103,6 +124,12 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2, 3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [5, 6, 7, 8, 9, 10] Statistics: Num rows: 303 Data size: 137686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap @@ -110,21 +137,44 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:struct, VALUE._col1:float, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:struct, VALUE._col5:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFMaxDouble(col 6) -> float, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFCountMerge(col 8) -> bigint, VectorUDAFVarPopFinal(col 9) -> double, VectorUDAFVarSampFinal(col 10) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -132,10 +182,21 @@ STAGE PLANS: Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22] + selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 11:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1, val -26.280000686645508) -> 12:double, DoubleColUnaryMinus(col 1) -> 14:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleColDivideDoubleScalar(col 17, val 10.175)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleScalar(col 18, val 10.175)(children: DoubleColUnaryMinus(col 17)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5) -> 17:double, DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 19:double, DoubleColModuloDoubleScalar(col 9, val 10.175) -> 20:double, DoubleColUnaryMinus(col 21)(children: DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 21:double) -> 22:double Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22] Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 @@ -143,17 +204,30 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 22 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:float, KEY.reducesinkkey2:double, KEY.reducesinkkey3:timestamp, VALUE._col0:boolean, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:float, VALUE._col5:float, VALUE._col6:float, VALUE._col7:float, VALUE._col8:double, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:double, VALUE._col12:double, VALUE._col13:double, VALUE._col14:double, VALUE._col15:double, VALUE._col16:double, VALUE._col17:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 766904e..79c7084 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -82,15 +82,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean, FilterStringColLikeStringScalar(col 6, pattern 10%) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -75) -> boolean, FilterLongColEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 5, val -3728.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5, 6, 8, 10] Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 4, col 10, col 5, col 6, col 0, col 2, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -99,6 +119,12 @@ STAGE PLANS: key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2, 3, 4, 5, 6] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [7, 8, 9, 10, 11, 12] Statistics: Num rows: 6144 Data size: 3293884 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap @@ -106,21 +132,31 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: llap Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + enabled: false + enableConditionsMet: hive.execution.engine tez IN [tez, spark] IS true + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -135,14 +171,11 @@ STAGE PLANS: Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + enabled: false + enableConditionsMet: hive.execution.engine tez IN [tez, spark] IS true + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index 686b16c..d961af2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -59,15 +59,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -76,6 +96,12 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 4, 5] Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap @@ -83,21 +109,43 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFStdSampFinal(col 4) -> double, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -105,9 +153,17 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6) Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 735c015..1c07962 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -67,16 +67,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 1647550 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) Statistics: Num rows: 4778 Data size: 640688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] + selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17] Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap @@ -89,22 +107,41 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4778 Data size: 1414848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 709a75f..affd1b6 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -1,3 +1,182 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2157324 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8, col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern b%) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarNotEqualDoubleColumn(val -10669.0, col 12)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterLongScalarGreaterLongColumn(val 359, col 2) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((-10669.0 <> UDFToDouble(ctimestamp2)) or (359 > cint)))) (type: boolean) + Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 5] + Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 1) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFAvgDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFVarPopFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFMinLong(col 4) -> tinyint, VectorUDAFAvgFinal(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12] + selectExpressions: DoubleColModuloDoubleScalar(col 0, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2) -> 8:double, DoubleColSubtractDoubleColumn(col 1, col 0) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColSubtractDoubleColumn(col 1, col 0) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15, col 1)(children: DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 15:double) -> 12:double + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 2398dee..4154746 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -1,4 +1,188 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 1276620 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -29071.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 1) -> 14:decimal(8,3)) -> boolean, FilterTimestampColGreaterTimestampColumn(col 8, col 9) -> boolean) -> boolean) -> boolean + predicate: (((UDFToFloat(cint) <= cfloat) and (79.553 <> CAST( cbigint AS decimal(22,3))) and (UDFToDouble(ctimestamp2) = -29071.0)) or ((UDFToDouble(cbigint) > cdouble) and (79.553 <= CAST( csmallint AS decimal(8,3))) and (ctimestamp1 > ctimestamp2))) (type: boolean) + Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) + outputColumnNames: ctinyint, csmallint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(22,3), decimal(8,3) + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:double, VALUE._col4:struct, VALUE._col5:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFStdSampFinal(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0, col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1) -> 7:double, DoubleColModuloDoubleScalar(col 0, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11)(children: DoubleColMultiplyDoubleColumn(col 0, col 10)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0) -> 11:double, DoubleColDivideDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 0, col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0) -> 12:double, DoubleColDivideDoubleColumn(col 4, col 2) -> 15:double + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 0d6829f..0b2adad 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -1,3 +1,181 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterLongScalarGreaterEqualLongColumn(val -89010, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterDoubleScalar(col 5, val 79.553) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -563, col 3) -> boolean, FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val -3728.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((UDFToInteger(csmallint) >= cint) or ((-89010 >= UDFToInteger(ctinyint)) and (cdouble > 79.553)) or ((-563 <> cbigint) and ((UDFToLong(ctinyint) <> cbigint) or (-3728.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) + outputColumnNames: ctinyint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 5] + Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:tinyint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFMinLong(col 4) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16] + selectExpressions: LongColMultiplyLongScalar(col 0, val -563) -> 5:long, LongScalarAddLongColumn(val -3728, col 0) -> 6:long, DoubleColUnaryMinus(col 1) -> 7:double, LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 9:long, DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13)(children: DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8, col 10)(children: LongScalarAddLongColumn(val -3728, col 0) -> 8:long, LongColMultiplyLongScalar(col 0, val -563) -> 10:long) -> 14:long, DoubleColMultiplyDoubleColumn(col 13, col 15)(children: CastLongToDouble(col 4) -> 13:double, DoubleColUnaryMinus(col 16)(children: DoubleColDivideDoubleColumn(col 15, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 15:double) -> 16:double) -> 15:double) -> 16:double + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 914a626..2300780 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -1,3 +1,176 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2454862 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %b%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern a) -> boolean) -> boolean) -> boolean + predicate: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) + Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) + outputColumnNames: ctinyint, csmallint, cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1) -> smallint, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 5, 6, 7, 9, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:smallint, VALUE._col1:bigint, VALUE._col2:smallint, VALUE._col3:bigint, VALUE._col4:tinyint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> smallint, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFMaxLong(col 4) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 9, 6, 2, 10, 7, 3, 4, 11, 14] + selectExpressions: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 5:long, DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 6)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 6:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 9:double, LongScalarMultiplyLongColumn(val 6981, col 0)(children: col 0) -> 6:long, LongColUnaryMinus(col 2) -> 10:long, DoubleScalarModuloDoubleColumn(val 197.0, col 12)(children: DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 11)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 11:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 12:double) -> 7:double, LongColUnaryMinus(col 4) -> 11:long, LongColAddLongColumn(col 13, col 4)(children: LongColUnaryMinus(col 4) -> 13:long) -> 14:long + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), diff --git ql/src/test/results/clientpositive/llap/vectorization_6.q.out ql/src/test/results/clientpositive/llap/vectorization_6.q.out index 13897f6..33814f2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_6.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_6.q.out @@ -1,3 +1,115 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2110130 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10, val 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 11, col 10) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 3) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %a) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -257.0) -> boolean) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean) + Statistics: Num rows: 5951 Data size: 1022000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28 / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21] + selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1)(children: col 1) -> 12:long, LongColUnaryMinus(col 1) -> 13:long, DoubleColUnaryMinus(col 4) -> 14:double, DoubleScalarDivideDoubleColumn(val -26.28, col 4)(children: col 4) -> 15:double, DoubleColMultiplyDoubleScalar(col 4, val 359.0) -> 16:double, LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 17:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColSubtractLongScalar(col 0, val -75)(children: col 0) -> 19:long, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 20:long) -> 21:long + Statistics: Num rows: 5951 Data size: 715128 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5951 Data size: 715128 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, double, double, double, bigint, double, bigint, bigint, bigint + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cboolean1, cfloat, cstring1, diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index ba49bed..6c32ccf 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -97,8 +97,10 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 7281 Data size: 1231410 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -111,15 +113,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaaa + reduceColumnSortOrder: +++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:smallint, KEY.reducesinkkey3:tinyint, KEY.reducesinkkey4:timestamp, KEY.reducesinkkey5:string, KEY.reducesinkkey6:bigint, KEY.reducesinkkey7:int, KEY.reducesinkkey8:smallint, KEY.reducesinkkey9:tinyint, KEY.reducesinkkey10:int, KEY.reducesinkkey11:bigint, KEY.reducesinkkey12:int, KEY.reducesinkkey13:tinyint, KEY.reducesinkkey14:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 9e9f2c7..0d5b6d5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -93,8 +93,10 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 3060 Data size: 557456 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -107,15 +109,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, double, double, double, double, double, double, double Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaa + reduceColumnSortOrder: ++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:double, KEY.reducesinkkey2:boolean, KEY.reducesinkkey3:string, KEY.reducesinkkey4:float, KEY.reducesinkkey5:double, KEY.reducesinkkey6:double, KEY.reducesinkkey7:double, KEY.reducesinkkey8:float, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:float, KEY.reducesinkkey13:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index 686b16c..d961af2 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -59,15 +59,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -76,6 +96,12 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 4, 5] Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap @@ -83,21 +109,43 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFStdSampFinal(col 4) -> double, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -105,9 +153,17 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6) Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorization_limit.q.out ql/src/test/results/clientpositive/llap/vectorization_limit.q.out index 99d84fe..c7fdb65 100644 --- ql/src/test/results/clientpositive/llap/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_limit.q.out @@ -260,13 +260,14 @@ STAGE PLANS: Group By Operator aggregations: avg(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 12) -> struct + aggregators: VectorUDAFAvgDouble(col 12) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -275,6 +276,13 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0] + valueColumns: [1] Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -283,7 +291,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -295,24 +303,47 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: double Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, VALUE._col0:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -394,9 +425,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash @@ -448,9 +481,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint) mode: mergepartial @@ -548,9 +583,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash @@ -602,9 +639,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial @@ -615,9 +654,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0] keys: _col0 (type: tinyint) mode: complete @@ -745,9 +786,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash @@ -801,9 +844,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: double) mode: mergepartial diff --git ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out index 6a99fc3..f068ad4 100644 --- ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out @@ -46,18 +46,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 465f4ea..0057762 100644 --- ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -114,17 +114,22 @@ STAGE PLANS: Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap @@ -132,30 +137,48 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFStdSampFinal(col 3) -> double, VectorUDAFVarSampFinal(col 4) -> double, VectorUDAFAvgFinal(col 5) -> double, VectorUDAFStdSampFinal(col 6) -> double, VectorUDAFMinLong(col 7) -> tinyint, VectorUDAFCountMerge(col 8) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] + selectExpressions: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12)(children: DoubleColUnaryMinus(col 10)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 13)(children: DoubleColUnaryMinus(col 12)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0) -> 12:double, DoubleColMultiplyDoubleColumn(col 16, col 13)(children: DoubleColMultiplyDoubleColumn(col 13, col 15)(children: DoubleColUnaryMinus(col 15)(children: DoubleColUnaryMinus(col 13)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15)(children: DoubleColUnaryMinus(col 13)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2) -> 13:double, DoubleColSubtractDoubleColumn(col 2, col 16)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 16)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18, col 2)(children: DoubleColSubtractDoubleColumn(col 2, col 16)(children: DoubleColUnaryMinus(col 18)(children: DoubleColUnaryMinus(col 16)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4) -> 18:double, DoubleColUnaryMinus(col 19)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22)(children: DoubleColDivideDoubleScalar(col 19, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0, col 1) -> 22:double, DoubleColDivideDoubleColumn(col 23, col 25)(children: CastLongToDouble(col 7) -> 23:double, DoubleColDivideDoubleScalar(col 24, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23)(children: DoubleColDivideDoubleColumn(col 0, col 1) -> 23:double) -> 25:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -355,17 +378,22 @@ STAGE PLANS: Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized, llap @@ -373,30 +401,48 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFVarPopFinal(col 1) -> double, VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFMaxDouble(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFMinLong(col 5) -> int, VectorUDAFMinDouble(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double, VectorUDAFVarSampFinal(col 8) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 10, 11, 1, 13, 2, 14, 9, 15, 3, 4, 16, 5, 19, 17, 6, 18, 7, 20, 12, 21, 23, 8] + selectExpressions: DoubleColDivideDoubleScalar(col 9, val -3728.0)(children: CastLongToDouble(col 0) -> 9:double) -> 10:double, LongColMultiplyLongScalar(col 0, val -3728) -> 11:long, LongColUnaryMinus(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 13:long, LongScalarModuloLongColumn(val -563, col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 14:long, DoubleColDivideDoubleColumn(col 1, col 2) -> 9:double, DoubleColUnaryMinus(col 2) -> 15:double, DoubleColSubtractDoubleScalar(col 2, val 10.175) -> 16:double, DoubleColModuloDoubleColumn(col 17, col 18)(children: CastLongToDouble(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 17:double, DoubleColSubtractDoubleScalar(col 2, val 10.175) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 3) -> 17:double, DoubleColModuloDoubleScalar(col 3, val -26.28) -> 18:double, DoubleColUnaryMinus(col 21)(children: DoubleColDivideDoubleScalar(col 20, val -3728.0)(children: CastLongToDouble(col 0) -> 20:double) -> 21:double) -> 20:double, LongColModuloLongColumn(col 22, col 23)(children: LongColUnaryMinus(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 22:long, LongScalarModuloLongColumn(val -563, col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 23:long) -> 12:long, DoubleColSubtractDoubleColumn(col 24, col 4)(children: DoubleColDivideDoubleScalar(col 21, val -3728.0)(children: CastLongToDouble(col 0) -> 21:double) -> 24:double) -> 21:double, LongColUnaryMinus(col 22)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 22:long) -> 23:long Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -479,7 +525,7 @@ WHERE (((cbigint <= 197) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 +-20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687792E16 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -588,17 +634,22 @@ STAGE PLANS: Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: - aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized, llap @@ -606,30 +657,48 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxLong(col 2) -> tinyint, VectorUDAFStdPopFinal(col 3) -> double, VectorUDAFMaxLong(col 4) -> int, VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFCountMerge(col 6) -> bigint, VectorUDAFAvgFinal(col 7) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 10, 1, 12, 2, 14, 13, 15, 1, 16, 3, 9, 19, 4, 18, 22, 5, 23, 6, 7, 24] + selectExpressions: DoubleColUnaryMinus(col 0) -> 8:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 10:double, DecimalColModuloDecimalScalar(col 11, val 79.553)(children: CastLongToDecimal(col 1) -> 11:decimal(19,0)) -> 12:decimal(5,3), DoubleColSubtractDoubleColumn(col 9, col 13)(children: CastLongToDouble(col 1) -> 9:double, DoubleColUnaryMinus(col 0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 13:double, DoubleScalarModuloDoubleColumn(val -1.0, col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 15:double, LongColUnaryMinus(col 1) -> 16:long, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 17:double) -> 9:double, LongScalarMultiplyLongColumn(val 762, col 18)(children: LongColUnaryMinus(col 1) -> 18:long) -> 19:long, LongColAddLongColumn(col 2, col 20)(children: col 2, LongScalarMultiplyLongColumn(val 762, col 18)(children: LongColUnaryMinus(col 1) -> 18:long) -> 20:long) -> 18:long, DoubleColAddDoubleColumn(col 17, col 21)(children: DoubleColUnaryMinus(col 0) -> 17:double, CastLongToDouble(col 4) -> 21:double) -> 22:double, LongColModuloLongColumn(col 20, col 1)(children: LongColUnaryMinus(col 1) -> 20:long) -> 23:long, LongScalarModuloLongColumn(val -3728, col 20)(children: LongColAddLongColumn(col 2, col 24)(children: col 2, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColUnaryMinus(col 1) -> 20:long) -> 24:long) -> 20:long) -> 24:long Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -800,17 +869,22 @@ STAGE PLANS: Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized, llap @@ -818,30 +892,48 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFStdSampFinal(col 2) -> double, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFVarPopFinal(col 4) -> double, VectorUDAFMaxDouble(col 5) -> float + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 8, 1, 7, 10, 2, 9, 3, 4, 12, 14, 5, 11] + selectExpressions: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 6:double, DoubleColAddDoubleColumn(col 7, col 0)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 7:double) -> 8:double, DoubleColDivideDoubleColumn(col 9, col 0)(children: DoubleColAddDoubleColumn(col 7, col 0)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 7:double) -> 9:double) -> 7:double, DoubleColUnaryMinus(col 9)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 9:double) -> 10:double, DoubleColModuloDoubleColumn(col 0, col 11)(children: DoubleColUnaryMinus(col 9)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 9:double) -> 11:double) -> 9:double, LongColUnaryMinus(col 1) -> 12:long, DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 13)(children: LongColUnaryMinus(col 1) -> 13:long) -> 11:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 4, val -26.28) -> 11:double Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2119,11 +2211,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2132,6 +2225,10 @@ STAGE PLANS: key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap @@ -2139,21 +2236,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 1) -> double, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFCountMerge(col 4) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3] keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2161,10 +2269,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13] + selectExpressions: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 5:long, DecimalScalarDivideDecimalColumn(val -1.389, col 6)(children: CastLongToDecimal(col 0) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9, col 10)(children: CastLongToDouble(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 9:double, CastLongToDouble(col 2) -> 10:double) -> 11:double, LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 12:long, LongColUnaryMinus(col 13)(children: LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 13:long) -> 8:long, LongColSubtractLongScalar(col 4, val -89010) -> 13:long Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -2374,11 +2491,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2387,6 +2505,10 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized, llap @@ -2394,21 +2516,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFVarSampFinal(col 1) -> double, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFSumDouble(col 3) -> double, VectorUDAFVarPopFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFSumDouble(col 6) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2416,10 +2549,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 7:double, DoubleColUnaryMinus(col 1) -> 8:double, DoubleColAddDoubleScalar(col 9, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9, col 12)(children: DoubleColUnaryMinus(col 1) -> 9:double, DoubleColAddDoubleScalar(col 11, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 1) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0, col 1) -> 9:double, DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14)(children: DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 14:double) -> 15:double Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 @@ -2671,13 +2813,14 @@ STAGE PLANS: Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) Group By Vectorization: - aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 8, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2686,6 +2829,10 @@ STAGE PLANS: key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6144 Data size: 5199016 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized, llap @@ -2693,21 +2840,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFAvgFinal(col 3) -> double, VectorUDAFCountMerge(col 4) -> bigint, VectorUDAFMinLong(col 5) -> tinyint, VectorUDAFVarSampFinal(col 6) -> double, VectorUDAFVarPopFinal(col 7) -> double, VectorUDAFAvgFinal(col 8) -> double, VectorUDAFVarSampFinal(col 9) -> double, VectorUDAFAvgFinal(col 10) -> double, VectorUDAFMinDouble(col 11) -> double, VectorUDAFVarPopFinal(col 12) -> double, VectorUDAFStdPopFinal(col 13) -> double, VectorUDAFSumLong(col 14) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2715,10 +2873,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 15, 16, 3, 17, 18, 4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 12, 31, 34, 37, 13, 14, 38, 40, 4, 39] + selectExpressions: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2) -> 16:double, DoubleColUnaryMinus(col 2) -> 17:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 18:double, LongColUnaryMinus(col 4) -> 19:long, DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 23:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 20)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 20:double) -> 23:double, DoubleColAddDoubleColumn(col 6, col 25)(children: DoubleColMultiplyDoubleColumn(col 26, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 25)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25)(children: DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, DoubleColDivideDoubleColumn(col 25, col 2)(children: CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 25:double, DoubleColSubtractDoubleColumn(col 28, col 30)(children: DoubleColAddDoubleColumn(col 6, col 29)(children: DoubleColMultiplyDoubleColumn(col 30, col 28)(children: DoubleColMultiplyDoubleColumn(col 28, col 29)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 28:double, DoubleColUnaryMinus(col 2) -> 29:double) -> 30:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 28:double) -> 29:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 31, col 29)(children: DoubleColMultiplyDoubleColumn(col 29, col 30)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 29:double, DoubleColUnaryMinus(col 2) -> 30:double) -> 31:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 30)(children: DoubleColUnaryMinus(col 28)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 28:double) -> 30:double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31, val 10.175)(children: DoubleColSubtractDoubleColumn(col 30, col 32)(children: DoubleColAddDoubleColumn(col 6, col 31)(children: DoubleColMultiplyDoubleColumn(col 32, col 30)(children: DoubleColMultiplyDoubleColumn(col 30, col 31)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 30:double, DoubleColUnaryMinus(col 2) -> 31:double) -> 32:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 30:double) -> 31:double) -> 30:double, DoubleColMultiplyDoubleColumn(col 33, col 31)(children: DoubleColMultiplyDoubleColumn(col 31, col 32)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 32:double) -> 33:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 31:double) -> 32:double) -> 31:double) -> 30:double, DoubleScalarModuloDoubleColumn(val 10.175, col 31)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 31:double) -> 32:double, LongColUnaryMinus(col 5) -> 24:long, DoubleColUnaryMinus(col 34)(children: DoubleColMultiplyDoubleColumn(col 31, col 33)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 33, col 10)(children: DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -26.28, col 36)(children: CastLongToDecimal(col 35)(children: LongColUnaryMinus(col 5) -> 35:long) -> 36:decimal(3,0)) -> 37:decimal(8,6), DoubleColDivideDoubleColumn(col 33, col 7)(children: DoubleColAddDoubleColumn(col 6, col 38)(children: DoubleColMultiplyDoubleColumn(col 39, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 38)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double, DoubleColUnaryMinus(col 2) -> 38:double) -> 39:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 38:double) -> 33:double) -> 38:double, LongColUnaryMinus(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 40:long, DoubleColModuloDoubleScalar(col 33, val -26.28)(children: DoubleColAddDoubleColumn(col 6, col 39)(children: DoubleColMultiplyDoubleColumn(col 41, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 39)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double, DoubleColUnaryMinus(col 2) -> 39:double) -> 41:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 39:double) -> 33:double) -> 39:double Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -3051,13 +3218,14 @@ STAGE PLANS: Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: - aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -3066,6 +3234,10 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap @@ -3073,21 +3245,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 1) -> float, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFVarSampFinal(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFMinLong(col 5) -> bigint, VectorUDAFVarPopFinal(col 6) -> double, VectorUDAFSumLong(col 7) -> bigint, VectorUDAFStdSampFinal(col 8) -> double, VectorUDAFStdPopFinal(col 9) -> double, VectorUDAFAvgFinal(col 10) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -3095,10 +3278,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 11, 12, 2, 14, 3, 15, 17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30] + selectExpressions: DoubleColUnaryMinus(col 1) -> 11:double, DoubleScalarDivideDoubleColumn(val -26.28, col 1)(children: col 1) -> 12:double, DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 3, col 1)(children: col 1) -> 15:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 17:double, DoubleColAddDoubleColumn(col 16, col 3)(children: CastDecimalToDouble(col 18)(children: DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 16:double) -> 19:double, DoubleColUnaryMinus(col 20)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 20:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 20:double, DoubleColModuloDoubleColumn(col 3, col 21)(children: DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 21:double) -> 22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 13)(children: CastLongToDecimal(col 5) -> 13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), FuncNegateDecimalToDecimal(col 27)(children: DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), DoubleColUnaryMinus(col 10) -> 21:double, DoubleColMultiplyDoubleColumn(col 10, col 29)(children: CastLongToDouble(col 7) -> 29:double) -> 30:double Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 @@ -3271,8 +3463,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3310,8 +3504,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3385,8 +3581,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3424,8 +3622,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3571,8 +3771,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3610,8 +3812,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3685,8 +3889,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3724,8 +3930,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3799,8 +4007,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3838,8 +4048,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3913,8 +4125,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 4) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3952,8 +4166,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -4027,8 +4243,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 6) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -4066,8 +4284,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -4141,8 +4361,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 10) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -4180,8 +4402,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index 0beaba8..80add93 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -290,8 +290,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -329,8 +331,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 @@ -417,8 +421,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -456,8 +462,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out index 59badfb..39e8096 100644 --- ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_date_funcs.q.out @@ -1261,8 +1261,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1300,8 +1302,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 1) -> date, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index cf2db94..c3e5f7c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -16,9 +16,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -41,22 +43,51 @@ STAGE PLANS: TableScan alias: dtest Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: a (type: int) outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: FINAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: STREAMING + projectedOutputColumns: [] keys: a (type: int) mode: final outputColumnNames: _col0 Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col0), count(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1] Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -69,23 +100,45 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: a:int, b:int + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFCountMerge(col 1) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -107,9 +160,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -133,11 +188,26 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 2 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [] keys: cint (type: int) mode: hash outputColumnNames: _col0 @@ -146,6 +216,12 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -157,45 +233,97 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), avg(_col0), std(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFStdPopLong(col 0) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:struct, VALUE._col3:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2), std(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFStdPopFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index c9e3e60..9a1c44c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -136,8 +136,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -173,8 +175,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -198,8 +202,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -229,8 +235,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -373,8 +381,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -410,8 +420,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -435,8 +447,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -466,8 +480,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -610,8 +626,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -647,8 +665,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -672,8 +692,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -703,8 +725,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -848,8 +872,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -918,8 +944,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -957,8 +985,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -982,8 +1012,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1013,8 +1045,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -1042,8 +1076,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -1187,8 +1223,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 0) -> string, VectorUDAFBloomFilter(col 0) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1214,8 +1252,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1251,8 +1291,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1276,8 +1318,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1307,8 +1351,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinString(col 0) -> string, VectorUDAFMaxString(col 1) -> string, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -1336,8 +1382,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 @@ -1480,8 +1528,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 1) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilter(col 1) -> binary className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] mode: hash outputColumnNames: _col0, _col1, _col2 @@ -1517,8 +1567,10 @@ STAGE PLANS: Group By Operator aggregations: count() Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -1542,8 +1594,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1573,8 +1627,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> int, VectorUDAFMaxLong(col 1) -> int, VectorUDAFBloomFilterMerge(col 2) -> binary className: VectorGroupByOperator + groupByMode: FINAL vectorOutput: true native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0, 1, 2] mode: final outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 0f02856..e56800a 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -72,17 +72,22 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized, llap @@ -90,7 +95,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -138,20 +143,33 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out index 2769e66..26c377f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin2.q.out @@ -92,8 +92,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 1:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -173,8 +175,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 13eae75..e904286 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -163,18 +163,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 6cd31db..cdf6b3d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -289,13 +289,14 @@ STAGE PLANS: Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -304,6 +305,10 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -311,21 +316,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFMaxDecimal(col 6) -> decimal(4,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -333,6 +349,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 5c849f6..0a6d87a 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -3365,9 +3365,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1, col 2 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial @@ -3504,9 +3506,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 7) -> double className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: p_mfgr (type: string), p_brand (type: string) mode: hash diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 3c972cc..a750d9f 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -126,8 +126,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -137,21 +139,35 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 82d5518..24f8d36 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -17,24 +17,49 @@ POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000') POSTHOOK: type: QUERY POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY -Plan optimized by CBO. - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_2] - Select Operator [SEL_1] (rows=2 width=40) - Output:["_col0"] - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY @@ -46,36 +71,6 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test -POSTHOOK: type: QUERY -Plan optimized by CBO. - -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 llap - File Output Operator [FS_6] - Select Operator [SEL_5] (rows=1 width=80) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_4] (rows=1 width=80) - Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Group By Operator [GBY_2] (rows=1 width=80) - Output:["_col0","_col1"],aggregations:["min(ts)","max(ts)"] - Select Operator [SEL_1] (rows=2 width=40) - Output:["ts"] - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] - PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY PREHOOK: Input: default@test @@ -85,27 +80,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') -POSTHOOK: type: QUERY -Plan optimized by CBO. - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 llap - File Output Operator [FS_3] - Select Operator [SEL_2] (rows=1 width=40) - Output:["_col0"] - Filter Operator [FIL_4] (rows=1 width=40) - predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] - PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY PREHOOK: Input: default@test @@ -115,25 +89,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test -POSTHOOK: type: QUERY -Plan optimized by CBO. - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_4] - Select Operator [SEL_3] (rows=2 width=40) - Output:["_col0"] - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] - PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY PREHOOK: Input: default@test @@ -144,35 +99,136 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY -Plan optimized by CBO. - -Vertex dependency in root stage -Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Reducer 2 vectorized, llap - File Output Operator [FS_12] - Select Operator [SEL_11] (rows=1 width=80) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_10] (rows=1 width=80) - Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized, llap - PARTITION_ONLY_SHUFFLE [RS_9] - Group By Operator [GBY_8] (rows=1 width=80) - Output:["_col0","_col1"],aggregations:["min(ts)","max(ts)"] - Select Operator [SEL_7] (rows=2 width=40) - Output:["ts"] - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1] + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1] + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + selectExpressions: TimestampColSubtractTimestampColumn(col 1, col 0) -> 2:interval_day_time + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY @@ -183,26 +239,79 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') POSTHOOK: type: QUERY -Plan optimized by CBO. - -Stage-0 - Fetch Operator - limit:-1 - Stage-1 - Map 1 vectorized, llap - File Output Operator [FS_7] - Select Operator [SEL_6] (rows=1 width=40) - Output:["_col0"] - Filter Operator [FIL_5] (rows=1 width=40) - predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) - TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterTimestampColumnInList(col 0, values [0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0]) -> boolean + predicate: (ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) (type: boolean) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY @@ -213,3 +322,274 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ts) + Group By Vectorization: + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), CAST( _col0 AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1] + selectExpressions: CastDoubleToTimestamp(col 0) -> 1:timestamp + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +9.56332944E10 5000-07-01 13:00:00 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) + Group By Vectorization: + aggregators: VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: VALUE._col0:struct, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:struct, VALUE._col5:struct, VALUE._col6:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: variance(VALUE._col0), var_pop(VALUE._col1), var_samp(VALUE._col2), std(VALUE._col3), stddev(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFVarPopFinal(col 1) -> double, VectorUDAFVarSampFinal(col 2) -> double, VectorUDAFStdPopFinal(col 3) -> double, VectorUDAFStdPopFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFStdSampFinal(col 6) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +2.489106846793884E22 2.489106846793884E22 4.978213693587768E22 1.577690352E11 1.577690352E11 1.577690352E11 2.2311910930235822E11 diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index e326f5f..f6dcb7c 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -809,8 +809,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -848,8 +850,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -919,27 +923,48 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFSumTimestamp(col 0) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -955,8 +980,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 0) -> double className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1057,17 +1084,22 @@ STAGE PLANS: Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap @@ -1075,30 +1107,48 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFVarPopFinal(col 1) -> double, VectorUDAFVarPopFinal(col 2) -> double, VectorUDAFVarSampFinal(col 3) -> double, VectorUDAFStdPopFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFStdPopFinal(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 10, 11, 12, 13, 14, 15] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 3) -> 15:double Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_between_in.q.out ql/src/test/results/clientpositive/spark/vector_between_in.q.out index 9329ba7..2f87841 100644 --- ql/src/test/results/clientpositive/spark/vector_between_in.q.out +++ ql/src/test/results/clientpositive/spark/vector_between_in.q.out @@ -151,8 +151,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -189,8 +191,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -351,8 +355,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -389,8 +395,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -739,8 +747,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -777,8 +787,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1087,9 +1099,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1129,9 +1143,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1223,9 +1239,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1265,9 +1283,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1359,9 +1379,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1401,9 +1423,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial @@ -1495,9 +1519,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 5:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 4 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: boolean) mode: hash @@ -1537,9 +1563,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index 0aa347b..c69bc81 100644 --- ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -144,13 +144,14 @@ STAGE PLANS: Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -159,6 +160,10 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) @@ -166,20 +171,32 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1) -> double, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFAvgDecimalFinal(col 3) -> decimal(16,4) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -187,6 +204,10 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) diff --git ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out index b663831..9af0786 100644 --- ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out +++ ql/src/test/results/clientpositive/spark/vector_count_distinct.q.out @@ -1266,9 +1266,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 16 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ws_order_number (type: int) mode: hash @@ -1305,9 +1307,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -1318,8 +1322,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -1347,8 +1353,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index edda919..9994f2b 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -70,9 +70,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash @@ -112,9 +114,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFCountMerge(col 5) -> bigint, VectorUDAFMaxDecimal(col 6) -> decimal(23,14), VectorUDAFMinDecimal(col 7) -> decimal(23,14), VectorUDAFSumDecimal(col 8) -> decimal(38,18), VectorUDAFCountMerge(col 9) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: int) mode: mergepartial @@ -226,13 +230,14 @@ STAGE PLANS: Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -241,39 +246,66 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col4] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(20,10), VectorUDAFMinDecimal(col 3) -> decimal(20,10), VectorUDAFSumDecimal(col 4) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 5) -> decimal(34,14), VectorUDAFStdPopFinal(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double, VectorUDAFCountMerge(col 8) -> bigint, VectorUDAFMaxDecimal(col 9) -> decimal(23,14), VectorUDAFMinDecimal(col 10) -> decimal(23,14), VectorUDAFSumDecimal(col 11) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 12) -> decimal(37,18), VectorUDAFStdPopFinal(col 13) -> double, VectorUDAFStdSampFinal(col 14) -> double, VectorUDAFCountMerge(col 15) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 6144 Data size: 1082530 Basic stats: COMPLETE Column stats: NONE Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 15, val 1) -> boolean predicate: (_col15 > 1) (type: boolean) Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360843 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out index 59dcf7c..aff53a6 100644 --- ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/spark/vector_distinct_2.q.out @@ -141,9 +141,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash @@ -180,9 +182,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out index 94b3ef6..83f8604 100644 --- ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/spark/vector_groupby_3.q.out @@ -143,9 +143,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash @@ -185,9 +187,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 2) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 3a9f97b..62383c4 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -238,9 +238,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index 2f2609f..433b9a2 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -91,9 +91,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int) mode: hash @@ -142,9 +144,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -183,9 +187,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial @@ -362,9 +368,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: int), _col1 (type: int) mode: hash @@ -413,9 +421,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -454,9 +464,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out index fd3469c..dc394c8 100644 --- ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/spark/vector_orderby_5.q.out @@ -144,9 +144,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 7 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash @@ -186,9 +188,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: boolean) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 03e3a47..5554788 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -817,8 +817,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -870,8 +872,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index c91f175..8ca54f9 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -398,8 +398,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -451,8 +453,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/spark/vector_string_concat.q.out ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index b361ec0..17c79a5 100644 --- ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -354,9 +354,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 19 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash @@ -394,9 +396,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/spark/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/spark/vector_tablesample_rows.q.out new file mode 100644 index 0000000..734cf63 --- /dev/null +++ ql/src/test/results/clientpositive/spark/vector_tablesample_rows.q.out @@ -0,0 +1,424 @@ +PREHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 'key1' (type: string), 'value1' (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12, 13] + selectExpressions: ConstantVectorExpression(val key1) -> 12:string, ConstantVectorExpression(val value1) -> 13:string + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 2187264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: string, string + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: select 'key1', 'value1' from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +_c0 _c1 +key1 value1 +PREHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Row Limit Per Split: 1 + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [12] + selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(18,9) + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat + serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde + name: default.decimal_2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +_col0 +PREHOOK: query: select count(*) from decimal_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from decimal_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: 1 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Map Vectorization: + enabled: false +#### A masked pattern was here #### + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: bigint + Reduce Operator Tree: + Select Operator + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [] + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + Group By Vectorization: + aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 1:long) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from (select * from (Select 1 a) x order by x.a) y +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +_c0 +1 +PREHOOK: query: explain vectorization detail +create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: query: explain vectorization detail +create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + Map Vectorization: + enabled: false +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-3 + Create Table Operator: + Create Table + columns: _c0 int + input format: org.apache.hadoop.mapred.TextInputFormat +#### A masked pattern was here #### + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde name: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dual + isTemporary: true + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: create temporary table dual as select 1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@dual +POSTHOOK: query: create temporary table dual as select 1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dual +_c0 +PREHOOK: query: select * from dual +PREHOOK: type: QUERY +PREHOOK: Input: default@dual +#### A masked pattern was here #### +POSTHOOK: query: select * from dual +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dual +#### A masked pattern was here #### +dual._c0 +1 diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index 3f3c664..9c39b33 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -6,7 +6,7 @@ SELECT MIN(ctinyint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ctinyint) as c1, MAX(ctinyint), COUNT(ctinyint), @@ -51,8 +51,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -61,8 +63,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -74,23 +78,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:tinyint, VALUE._col1:tinyint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFMaxLong(col 1) -> tinyint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -100,8 +117,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -109,10 +128,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:tinyint, VALUE._col0:tinyint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -158,12 +183,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64 62 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(ctinyint) as c1 FROM alltypesorc ORDER BY c1 @@ -205,8 +230,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -215,8 +242,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -228,23 +257,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -254,18 +296,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -369,17 +419,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -448,7 +501,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -456,7 +509,7 @@ SELECT MIN(cbigint) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), COUNT(cbigint), @@ -501,8 +554,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 3) -> bigint, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -511,8 +566,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -524,23 +581,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -550,8 +620,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -559,10 +631,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -608,12 +686,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2147311592 2145498388 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cbigint) as c1 FROM alltypesorc ORDER BY c1 @@ -655,8 +733,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -665,8 +745,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -678,23 +760,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [3] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -704,18 +799,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) @@ -819,17 +922,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -898,7 +1004,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -906,7 +1012,7 @@ SELECT MIN(cfloat) as c1, FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), COUNT(cfloat), @@ -951,8 +1057,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDouble(col 4) -> float, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFCount(col 4) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -961,8 +1069,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -974,23 +1084,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: VALUE._col0:float, VALUE._col1:float, VALUE._col2:bigint, VALUE._col3:bigint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: aggregators: VectorUDAFMinDouble(col 0) -> float, VectorUDAFMaxDouble(col 1) -> float, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -1000,8 +1123,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [1, 2, 3] Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1009,10 +1134,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:float, VALUE._col0:float, VALUE._col1:bigint, VALUE._col2:bigint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) @@ -1058,12 +1189,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -64.0 79.553 9173 12288 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT SUM(cfloat) as c1 FROM alltypesorc ORDER BY c1 @@ -1105,8 +1236,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 4) -> double className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -1115,8 +1248,10 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1128,23 +1263,36 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [4] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: aggregators: VectorUDAFSumDouble(col 0) -> double className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1154,18 +1302,26 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: double) @@ -1269,17 +1425,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) @@ -1349,7 +1508,7 @@ POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 WARNING: Comparing a bigint and a double may result in a loss of precision. -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1376,7 +1535,7 @@ WHERE (((cstring2 LIKE '%b%') AND ((cboolean2 = 1) AND (3569 = ctinyint)))) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), @@ -1444,46 +1603,84 @@ STAGE PLANS: Group By Operator aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint + aggregators: VectorUDAFAvgLong(col 3) -> struct, VectorUDAFStdPopLong(col 3) -> struct, VectorUDAFVarSampLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:double, VALUE._col5:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFVarSampFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMinLong(col 5) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0 + _col0) (type: double), _col1 (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) + (-6432.0 + _col0)) (type: double), _col2 (type: double), (- (-6432.0 + _col0)) (type: double), (-6432.0 + (- (-6432.0 + _col0))) (type: double), (- (-6432.0 + _col0)) (type: double), ((- (-6432.0 + _col0)) / (- (-6432.0 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 7, 1, 9, 11, 2, 10, 8, 13, 12, 3, 4, 14, 15, 18, 5, 19] + selectExpressions: DoubleColUnaryMinus(col 0) -> 6:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 7:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 9:double, DoubleColAddDoubleColumn(col 10, col 8)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 11:double, DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 10:double, DoubleScalarAddDoubleColumn(val -6432.0, col 12)(children: DoubleColUnaryMinus(col 8)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 8:double) -> 12:double) -> 8:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColDivideDoubleColumn(col 14, col 15)(children: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 12:double) -> 15:double) -> 12:double, DoubleColModuloDoubleColumn(col 2, col 1) -> 14:double, DoubleColUnaryMinus(col 2) -> 15:double, DoubleColMultiplyDoubleColumn(col 17, col 16)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 0) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 0) -> 16:double) -> 18:double, LongColUnaryMinus(col 5) -> 19:long Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index e0a4344..78f1517 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -1,3 +1,176 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterLongColGreaterLongScalar(col 11, val 0) -> boolean) -> boolean, FilterLongColLessLongColumn(col 3, col 0)(children: col 0) -> boolean, FilterLongColGreaterLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean) -> boolean + predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:tinyint, VALUE._col3:int, VALUE._col4:struct, VALUE._col5:bigint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFMaxLong(col 2) -> tinyint, VectorUDAFMaxLong(col 3) -> int, VectorUDAFVarSampFinal(col 4) -> double, VectorUDAFCountMerge(col 5) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 7, 9, 2, 8, 3, 12, 4, 13, 5, 14] + selectExpressions: DoubleColDivideDoubleScalar(col 0, val -26.28) -> 6:double, DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 7:double, DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColMultiplyDoubleColumn(col 1, col 8)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 8:double) -> 10:double) -> 8:double, DecimalColMultiplyDecimalScalar(col 11, val 79.553)(children: CastLongToDecimal(col 3) -> 11:decimal(10,0)) -> 12:decimal(16,3), DoubleScalarModuloDoubleColumn(val 10.175, col 10)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 1, col 10)(children: DoubleScalarAddDoubleColumn(val -1.389, col 1) -> 10:double) -> 13:double) -> 10:double) -> 13:double, LongScalarModuloLongColumn(val -563, col 3) -> 14:long + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), diff --git ql/src/test/results/clientpositive/spark/vectorization_10.q.out ql/src/test/results/clientpositive/spark/vectorization_10.q.out index 9dad4c4..4e9cce3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_10.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_10.q.out @@ -1,3 +1,120 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double + Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(6,2), decimal(11,4), double, double, double, double, double, bigint, bigint, bigint, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cdouble, ctimestamp1, ctinyint, diff --git ql/src/test/results/clientpositive/spark/vectorization_11.q.out ql/src/test/results/clientpositive/spark/vectorization_11.q.out index dff58da..f79c3a0 100644 --- ql/src/test/results/clientpositive/spark/vectorization_11.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_11.q.out @@ -1,3 +1,102 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7, col 6) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0) (type: double), (cdouble * -5638.15) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 10, 5, 8, 12, 13, 14, 16, 15] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1)(children: col 1) -> 12:long, DoubleColSubtractDoubleScalar(col 5, val 9763215.5639) -> 13:double, DoubleColUnaryMinus(col 5) -> 14:double, DoubleColAddDoubleScalar(col 15, val 6981.0)(children: DoubleColUnaryMinus(col 5) -> 15:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5, val -5638.15) -> 15:double + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, double, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cstring1, cboolean1, cdouble, diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 6a7f69c..c17043b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -1,3 +1,242 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) + Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) + outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 6, 10] + Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint, VectorUDAFStdSampLong(col 3) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 3, col 6, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + sort order: ++++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2, 3] + valueColumns: [4, 5, 6, 7, 8] + Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 9 + dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 4) -> bigint, VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFAvgFinal(col 6) -> double, VectorUDAFSumLong(col 7) -> bigint, VectorUDAFStdPopFinal(col 8) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4] + keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 9:double, LongColUnaryMinus(col 1) -> 10:long, LongColMultiplyLongColumn(col 1, col 4) -> 11:long, DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14)(children: DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16)(children: CastLongToDecimal(col 1) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6, col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14)(children: DoubleColUnaryMinus(col 19)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20, col 21)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 21:double) -> 14:double + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8] + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) + Reducer 3 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 19 + dataColumns: KEY.reducesinkkey0:double, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:string, VALUE._col0:boolean, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:bigint, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(22,2), VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double, VALUE._col14:double, VALUE._col15:double + partitionColumnCount: 0 + scratchColumnTypeNames: timestamp + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] + selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cbigint, cboolean1, cstring1, diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index 9a4c6c3..bae304b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -108,11 +108,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -121,26 +122,57 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2, 3, 4] + valueColumns: [5, 6, 7, 8, 9, 10] Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(11,4) Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:float, VALUE._col5:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5) -> tinyint, VectorUDAFSumDouble(col 6) -> double, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFStdPopFinal(col 8) -> double, VectorUDAFMaxDouble(col 9) -> float, VectorUDAFMinLong(col 10) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -148,10 +180,21 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -159,10 +202,16 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaaaaaaaaa + reduceColumnSortOrder: +++++++++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 21 + dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:tinyint, KEY.reducesinkkey2:timestamp, KEY.reducesinkkey3:float, KEY.reducesinkkey4:string, KEY.reducesinkkey5:tinyint, KEY.reducesinkkey6:tinyint, KEY.reducesinkkey7:tinyint, KEY.reducesinkkey8:double, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:double, KEY.reducesinkkey13:double, KEY.reducesinkkey14:double, KEY.reducesinkkey15:decimal(7,3), KEY.reducesinkkey16:double, KEY.reducesinkkey17:double, KEY.reducesinkkey18:float, KEY.reducesinkkey19:double, KEY.reducesinkkey20:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) @@ -413,11 +462,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -426,26 +476,42 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5) -> tinyint, VectorUDAFSumDouble(col 6) -> double, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFStdPopFinal(col 8) -> double, VectorUDAFMaxDouble(col 9) -> float, VectorUDAFMinLong(col 10) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -453,10 +519,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10] + selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index 1541908..9d52abe 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -85,15 +85,36 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterTimestampColLessTimestampColumn(col 9, col 8) -> boolean) -> boolean, FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3, val -257) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 4, 6, 10, 5, 13] + selectExpressions: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5) -> 12:double) -> 13:double Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 13) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFVarSampDouble(col 4) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 6, col 4, col 5, col 8, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -102,26 +123,57 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2, 3, 4] + valueColumns: [5, 6, 7, 8, 9, 10] Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:struct, VALUE._col1:float, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:struct, VALUE._col5:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFMaxDouble(col 6) -> float, VectorUDAFStdPopFinal(col 7) -> double, VectorUDAFCountMerge(col 8) -> bigint, VectorUDAFVarPopFinal(col 9) -> double, VectorUDAFVarSampFinal(col 10) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2, col 3, col 4 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -129,10 +181,21 @@ STAGE PLANS: Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22] + selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 11:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1, val -26.280000686645508) -> 12:double, DoubleColUnaryMinus(col 1) -> 14:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleColDivideDoubleScalar(col 17, val 10.175)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleScalar(col 18, val 10.175)(children: DoubleColUnaryMinus(col 17)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5) -> 17:double, DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 19:double, DoubleColModuloDoubleScalar(col 9, val 10.175) -> 20:double, DoubleColUnaryMinus(col 21)(children: DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 21:double) -> 22:double Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22] Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 @@ -140,17 +203,30 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 22 + dataColumns: KEY.reducesinkkey0:string, KEY.reducesinkkey1:float, KEY.reducesinkkey2:double, KEY.reducesinkkey3:timestamp, VALUE._col0:boolean, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:float, VALUE._col5:float, VALUE._col6:float, VALUE._col7:float, VALUE._col8:double, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:double, VALUE._col12:double, VALUE._col13:double, VALUE._col14:double, VALUE._col15:double, VALUE._col16:double, VALUE._col17:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index b2d8aaf..cc9ae1d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -81,15 +81,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean, FilterStringColLikeStringScalar(col 6, pattern 10%) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -75) -> boolean, FilterLongColEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 5, val -3728.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5, 6, 8, 10] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 4, col 10, col 5, col 6, col 0, col 2, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -98,26 +118,43 @@ STAGE PLANS: key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2, 3, 4, 5, 6] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2, 3, 4, 5, 6] + valueColumns: [7, 8, 9, 10, 11, 12] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + enabled: false + enableConditionsMet: hive.execution.engine spark IN [tez, spark] IS true + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -132,14 +169,10 @@ STAGE PLANS: Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 - Execution mode: vectorized Reduce Vectorization: - enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + enabled: false + enableConditionsMet: hive.execution.engine spark IN [tez, spark] IS true + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index e731c2d..d5235aa 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -58,15 +58,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -75,26 +95,56 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2] + valueColumns: [3, 4, 5] Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFStdSampFinal(col 4) -> double, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -102,9 +152,17 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6) Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index 32d1c0b..9395a01 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -66,16 +66,34 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] + selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [3, 4] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17] Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized @@ -87,22 +105,41 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY.reducesinkkey0:bigint, KEY.reducesinkkey1:float, VALUE._col0:string, VALUE._col1:int, VALUE._col2:timestamp, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:double, VALUE._col9:double, VALUE._col10:decimal(11,4), VALUE._col11:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index 709a75f..a3c70bb 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -1,3 +1,180 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8, col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern b%) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarNotEqualDoubleColumn(val -10669.0, col 12)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterLongScalarGreaterLongColumn(val 359, col 2) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((-10669.0 <> UDFToDouble(ctimestamp2)) or (359 > cint)))) (type: boolean) + Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 5] + Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 1) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFAvgDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:double, VALUE._col2:struct, VALUE._col3:bigint, VALUE._col4:tinyint, VALUE._col5:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFVarPopFinal(col 2) -> double, VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFMinLong(col 4) -> tinyint, VectorUDAFAvgFinal(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 7, 1, 2, 8, 9, 3, 11, 10, 4, 14, 5, 12] + selectExpressions: DoubleColModuloDoubleScalar(col 0, val -563.0) -> 6:double, DoubleColAddDoubleScalar(col 0, val 762.0) -> 7:double, DoubleColUnaryMinus(col 2) -> 8:double, DoubleColSubtractDoubleColumn(col 1, col 0) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColSubtractDoubleColumn(col 1, col 0) -> 10:double) -> 11:double, DoubleColSubtractDoubleScalar(col 2, val 762.0) -> 10:double, DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 14:double, DoubleColSubtractDoubleColumn(col 15, col 1)(children: DoubleColAddDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 2) -> 12:double, CastLongToDouble(col 4) -> 13:double) -> 15:double) -> 12:double + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index 2398dee..a335c7d 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -1,4 +1,186 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -29071.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 1) -> 14:decimal(8,3)) -> boolean, FilterTimestampColGreaterTimestampColumn(col 8, col 9) -> boolean) -> boolean) -> boolean + predicate: (((UDFToFloat(cint) <= cfloat) and (79.553 <> CAST( cbigint AS decimal(22,3))) and (UDFToDouble(ctimestamp2) = -29071.0)) or ((UDFToDouble(cbigint) > cdouble) and (79.553 <= CAST( csmallint AS decimal(8,3))) and (ctimestamp1 > ctimestamp2))) (type: boolean) + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) + outputColumnNames: ctinyint, csmallint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4, 5] + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(22,3), decimal(8,3) + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: VALUE._col0:struct, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:double, VALUE._col4:struct, VALUE._col5:struct + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 0) -> double, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFStdSampFinal(col 2) -> double, VectorUDAFSumDouble(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 1, 8, 7, 9, 10, 2, 11, 3, 14, 13, 4, 12, 5, 15] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 6:double, DoubleColMultiplyDoubleColumn(col 0, col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 1) -> 7:double, DoubleColModuloDoubleScalar(col 0, val 79.553) -> 9:double, DoubleColUnaryMinus(col 11)(children: DoubleColMultiplyDoubleColumn(col 0, col 10)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 10:double) -> 11:double) -> 10:double, DoubleColUnaryMinus(col 0) -> 11:double, DoubleColDivideDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 13)(children: DoubleColMultiplyDoubleColumn(col 0, col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double) -> 12:double, DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 12)(children: DoubleColSubtractDoubleScalar(col 0, val 10.175) -> 12:double) -> 13:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 0) -> 12:double, DoubleColDivideDoubleColumn(col 4, col 2) -> 15:double + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index 0d6829f..3d0e700 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -1,3 +1,179 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterLongScalarGreaterEqualLongColumn(val -89010, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterDoubleScalar(col 5, val 79.553) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -563, col 3) -> boolean, FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val -3728.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((UDFToInteger(csmallint) >= cint) or ((-89010 >= UDFToInteger(ctinyint)) and (cdouble > 79.553)) or ((-563 <> cbigint) and ((UDFToLong(ctinyint) <> cbigint) or (-3728.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) + outputColumnNames: ctinyint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 5] + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:struct, VALUE._col3:struct, VALUE._col4:tinyint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFStdPopFinal(col 1) -> double, VectorUDAFAvgFinal(col 2) -> double, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFMinLong(col 4) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 6, 1, 7, 2, 9, 12, 3, 11, 14, 4, 4, 16] + selectExpressions: LongColMultiplyLongScalar(col 0, val -563) -> 5:long, LongScalarAddLongColumn(val -3728, col 0) -> 6:long, DoubleColUnaryMinus(col 1) -> 7:double, LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 9:long, DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 12:double, DoubleColUnaryMinus(col 13)(children: DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 11:double) -> 13:double) -> 11:double, LongColSubtractLongColumn(col 8, col 10)(children: LongScalarAddLongColumn(val -3728, col 0) -> 8:long, LongColMultiplyLongScalar(col 0, val -563) -> 10:long) -> 14:long, DoubleColMultiplyDoubleColumn(col 13, col 15)(children: CastLongToDouble(col 4) -> 13:double, DoubleColUnaryMinus(col 16)(children: DoubleColDivideDoubleColumn(col 15, col 2)(children: CastLongToDouble(col 10)(children: LongColModuloLongColumn(col 8, col 0)(children: LongColMultiplyLongScalar(col 0, val -563) -> 8:long) -> 10:long) -> 15:double) -> 16:double) -> 15:double) -> 16:double + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 914a626..2737d9b 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -1,3 +1,174 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %b%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern a) -> boolean) -> boolean) -> boolean + predicate: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) + outputColumnNames: ctinyint, csmallint, cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1) -> smallint, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + keyColumns: [] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [0, 1, 2, 3, 4] + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 5, 6, 7, 9, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reducer 2 + Execution mode: vectorized + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:smallint, VALUE._col1:bigint, VALUE._col2:smallint, VALUE._col3:bigint, VALUE._col4:tinyint + partitionColumnCount: 0 + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> smallint, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFMaxLong(col 4) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 9, 6, 2, 10, 7, 3, 4, 11, 14] + selectExpressions: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 5:long, DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 6)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 6:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 9:double, LongScalarMultiplyLongColumn(val 6981, col 0)(children: col 0) -> 6:long, LongColUnaryMinus(col 2) -> 10:long, DoubleScalarModuloDoubleColumn(val 197.0, col 12)(children: DoubleColDivideDoubleColumn(col 7, col 8)(children: CastLongToDouble(col 11)(children: LongColMultiplyLongScalar(col 0, val -75)(children: col 0) -> 11:long) -> 7:double, CastLongToDouble(col 1) -> 8:double) -> 12:double) -> 7:double, LongColUnaryMinus(col 4) -> 11:long, LongColAddLongColumn(col 13, col 4)(children: LongColUnaryMinus(col 4) -> 13:long) -> 14:long + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), diff --git ql/src/test/results/clientpositive/spark/vectorization_6.q.out ql/src/test/results/clientpositive/spark/vectorization_6.q.out index 13897f6..4906328 100644 --- ql/src/test/results/clientpositive/spark/vectorization_6.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_6.q.out @@ -1,3 +1,114 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10, val 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 11, col 10) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 3) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %a) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -257.0) -> boolean) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean) + Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28 / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21] + selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1)(children: col 1) -> 12:long, LongColUnaryMinus(col 1) -> 13:long, DoubleColUnaryMinus(col 4) -> 14:double, DoubleScalarDivideDoubleColumn(val -26.28, col 4)(children: col 4) -> 15:double, DoubleColMultiplyDoubleScalar(col 4, val 359.0) -> 16:double, LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 17:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColSubtractLongScalar(col 0, val -75)(children: col 0) -> 19:long, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 20:long) -> 21:long + Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, double, double, double, bigint, double, bigint, bigint, bigint + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cboolean1, cfloat, cstring1, diff --git ql/src/test/results/clientpositive/spark/vectorization_7.q.out ql/src/test/results/clientpositive/spark/vectorization_7.q.out index d2ff353..d49af0c 100644 --- ql/src/test/results/clientpositive/spark/vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -96,8 +96,10 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [10, 3, 1, 0, 8, 6, 13, 14, 15, 16, 18, 19, 17, 20, 22] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 7281 Data size: 223523 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -109,15 +111,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaaa + reduceColumnSortOrder: +++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY.reducesinkkey0:boolean, KEY.reducesinkkey1:bigint, KEY.reducesinkkey2:smallint, KEY.reducesinkkey3:tinyint, KEY.reducesinkkey4:timestamp, KEY.reducesinkkey5:string, KEY.reducesinkkey6:bigint, KEY.reducesinkkey7:int, KEY.reducesinkkey8:smallint, KEY.reducesinkkey9:tinyint, KEY.reducesinkkey10:int, KEY.reducesinkkey11:bigint, KEY.reducesinkkey12:int, KEY.reducesinkkey13:tinyint, KEY.reducesinkkey14:tinyint + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) diff --git ql/src/test/results/clientpositive/spark/vectorization_8.q.out ql/src/test/results/clientpositive/spark/vectorization_8.q.out index 927ee59..3eed209 100644 --- ql/src/test/results/clientpositive/spark/vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -92,8 +92,10 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: [8, 5, 10, 6, 4, 12, 13, 14, 16, 18, 15, 17, 19, 21] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: [] Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized @@ -105,15 +107,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, double, double, double, double, double, double, double Reducer 2 Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + reduceColumnNullOrder: aaaaaaaaaaaaaa + reduceColumnSortOrder: ++++++++++++++ groupByVectorOutput: true allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:double, KEY.reducesinkkey2:boolean, KEY.reducesinkkey3:string, KEY.reducesinkkey4:float, KEY.reducesinkkey5:double, KEY.reducesinkkey6:double, KEY.reducesinkkey7:double, KEY.reducesinkkey8:float, KEY.reducesinkkey9:double, KEY.reducesinkkey10:double, KEY.reducesinkkey11:float, KEY.reducesinkkey12:float, KEY.reducesinkkey13:double + partitionColumnCount: 0 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index e731c2d..d5235aa 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -58,15 +58,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -75,26 +95,56 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0, 1, 2] + valueColumns: [3, 4, 5] Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 6 + dataColumns: KEY._col0:double, KEY._col1:string, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:struct, VALUE._col2:double + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3) -> bigint, VectorUDAFStdSampFinal(col 4) -> double, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1, col 2 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2] keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -102,9 +152,17 @@ STAGE PLANS: Select Operator expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4] + selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6) Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 1f1bb30..0b901be 100644 --- ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -44,17 +44,20 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) diff --git ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index 2c7c57a..4d3e41a 100644 --- ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -113,46 +113,70 @@ STAGE PLANS: Group By Operator aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint + aggregators: VectorUDAFAvgLong(col 2) -> struct, VectorUDAFSumDouble(col 5) -> double, VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFCount(col 1) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFSumDouble(col 1) -> double, VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFStdSampFinal(col 3) -> double, VectorUDAFVarSampFinal(col 4) -> double, VectorUDAFAvgFinal(col 5) -> double, VectorUDAFStdSampFinal(col 6) -> double, VectorUDAFMinLong(col 7) -> tinyint, VectorUDAFCountMerge(col 8) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (_col0 + -3728.0) (type: double), (- (_col0 + -3728.0)) (type: double), (- (- (_col0 + -3728.0))) (type: double), ((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0))) * (_col0 + -3728.0)) * (- (- (_col0 + -3728.0)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175 - _col4) (type: double), (- (10.175 - _col4)) (type: double), ((- _col2) / -563.0) (type: double), _col6 (type: double), (- ((- _col2) / -563.0)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0)) (type: double), (- (_col0 / _col1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 9, 11, 10, 14, 1, 12, 2, 15, 3, 13, 17, 16, 4, 5, 18, 20, 21, 6, 19, 22, 7, 8, 24, 25] + selectExpressions: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 9:double, DoubleColUnaryMinus(col 10)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 10:double) -> 11:double, DoubleColUnaryMinus(col 12)(children: DoubleColUnaryMinus(col 10)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 10:double) -> 12:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 12, col 13)(children: DoubleColUnaryMinus(col 13)(children: DoubleColUnaryMinus(col 12)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 12:double) -> 13:double) -> 12:double, DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 0) -> 12:double, DoubleColMultiplyDoubleColumn(col 16, col 13)(children: DoubleColMultiplyDoubleColumn(col 13, col 15)(children: DoubleColUnaryMinus(col 15)(children: DoubleColUnaryMinus(col 13)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 15:double) -> 13:double, DoubleColAddDoubleScalar(col 0, val -3728.0) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 15)(children: DoubleColUnaryMinus(col 13)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 13:double) -> 15:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 2) -> 13:double, DoubleColSubtractDoubleColumn(col 2, col 16)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 16)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 18, col 2)(children: DoubleColSubtractDoubleColumn(col 2, col 16)(children: DoubleColUnaryMinus(col 18)(children: DoubleColUnaryMinus(col 16)(children: DoubleColAddDoubleScalar(col 0, val -3728.0) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 4) -> 18:double, DoubleColUnaryMinus(col 19)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 4) -> 19:double) -> 20:double, DoubleColDivideDoubleScalar(col 19, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 19:double) -> 21:double, DoubleColUnaryMinus(col 22)(children: DoubleColDivideDoubleScalar(col 19, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 19:double) -> 22:double) -> 19:double, DoubleColDivideDoubleColumn(col 0, col 1) -> 22:double, DoubleColDivideDoubleColumn(col 23, col 25)(children: CastLongToDouble(col 7) -> 23:double, DoubleColDivideDoubleScalar(col 24, val -563.0)(children: DoubleColUnaryMinus(col 2) -> 24:double) -> 25:double) -> 24:double, DoubleColUnaryMinus(col 23)(children: DoubleColDivideDoubleColumn(col 0, col 1) -> 23:double) -> 25:double Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -351,46 +375,70 @@ STAGE PLANS: Group By Operator aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct + aggregators: VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMinLong(col 2) -> int, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFVarSampLong(col 2) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col1] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFVarPopFinal(col 1) -> double, VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFMaxDouble(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFMinLong(col 5) -> int, VectorUDAFMinDouble(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double, VectorUDAFVarSampFinal(col 8) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 10, 11, 1, 13, 2, 14, 9, 15, 3, 4, 16, 5, 19, 17, 6, 18, 7, 20, 12, 21, 23, 8] + selectExpressions: DoubleColDivideDoubleScalar(col 9, val -3728.0)(children: CastLongToDouble(col 0) -> 9:double) -> 10:double, LongColMultiplyLongScalar(col 0, val -3728) -> 11:long, LongColUnaryMinus(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 13:long, LongScalarModuloLongColumn(val -563, col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 14:long, DoubleColDivideDoubleColumn(col 1, col 2) -> 9:double, DoubleColUnaryMinus(col 2) -> 15:double, DoubleColSubtractDoubleScalar(col 2, val 10.175) -> 16:double, DoubleColModuloDoubleColumn(col 17, col 18)(children: CastLongToDouble(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 17:double, DoubleColSubtractDoubleScalar(col 2, val 10.175) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 3) -> 17:double, DoubleColModuloDoubleScalar(col 3, val -26.28) -> 18:double, DoubleColUnaryMinus(col 21)(children: DoubleColDivideDoubleScalar(col 20, val -3728.0)(children: CastLongToDouble(col 0) -> 20:double) -> 21:double) -> 20:double, LongColModuloLongColumn(col 22, col 23)(children: LongColUnaryMinus(col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 22:long, LongScalarModuloLongColumn(val -563, col 12)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 12:long) -> 23:long) -> 12:long, DoubleColSubtractDoubleColumn(col 24, col 4)(children: DoubleColDivideDoubleScalar(col 21, val -3728.0)(children: CastLongToDouble(col 0) -> 21:double) -> 24:double) -> 21:double, LongColUnaryMinus(col 22)(children: LongColMultiplyLongScalar(col 0, val -3728) -> 22:long) -> 23:long Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -473,7 +521,7 @@ WHERE (((cbigint <= 197) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 +-20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687792E16 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -581,46 +629,70 @@ STAGE PLANS: Group By Operator aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) Group By Vectorization: - aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct + aggregators: VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFMaxLong(col 2) -> int, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFCount(col 0) -> bigint, VectorUDAFAvgLong(col 0) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFVarPopFinal(col 0) -> double, VectorUDAFCountMerge(col 1) -> bigint, VectorUDAFMaxLong(col 2) -> tinyint, VectorUDAFStdPopFinal(col 3) -> double, VectorUDAFMaxLong(col 4) -> int, VectorUDAFStdSampFinal(col 5) -> double, VectorUDAFCountMerge(col 6) -> bigint, VectorUDAFAvgFinal(col 7) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0 % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762 * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762 * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728 % (UDFToLong(_col2) + (762 * (- _col1)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 8, 10, 1, 12, 2, 14, 13, 15, 1, 16, 3, 9, 19, 4, 18, 22, 5, 23, 6, 7, 24] + selectExpressions: DoubleColUnaryMinus(col 0) -> 8:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 10:double, DecimalColModuloDecimalScalar(col 11, val 79.553)(children: CastLongToDecimal(col 1) -> 11:decimal(19,0)) -> 12:decimal(5,3), DoubleColSubtractDoubleColumn(col 9, col 13)(children: CastLongToDouble(col 1) -> 9:double, DoubleColUnaryMinus(col 0) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 13:double, DoubleScalarModuloDoubleColumn(val -1.0, col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 15:double, LongColUnaryMinus(col 1) -> 16:long, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 9)(children: DoubleColUnaryMinus(col 0) -> 9:double) -> 17:double) -> 9:double, LongScalarMultiplyLongColumn(val 762, col 18)(children: LongColUnaryMinus(col 1) -> 18:long) -> 19:long, LongColAddLongColumn(col 2, col 20)(children: col 2, LongScalarMultiplyLongColumn(val 762, col 18)(children: LongColUnaryMinus(col 1) -> 18:long) -> 20:long) -> 18:long, DoubleColAddDoubleColumn(col 17, col 21)(children: DoubleColUnaryMinus(col 0) -> 17:double, CastLongToDouble(col 4) -> 21:double) -> 22:double, LongColModuloLongColumn(col 20, col 1)(children: LongColUnaryMinus(col 1) -> 20:long) -> 23:long, LongScalarModuloLongColumn(val -3728, col 20)(children: LongColAddLongColumn(col 2, col 24)(children: col 2, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColUnaryMinus(col 1) -> 20:long) -> 24:long) -> 20:long) -> 24:long Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -790,46 +862,70 @@ STAGE PLANS: Group By Operator aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float + aggregators: VectorUDAFAvgLong(col 0) -> struct, VectorUDAFMaxLong(col 3) -> bigint, VectorUDAFStdSampLong(col 2) -> struct, VectorUDAFVarPopLong(col 2) -> struct, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFMaxDouble(col 4) -> float className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFMaxLong(col 1) -> bigint, VectorUDAFStdSampFinal(col 2) -> double, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFVarPopFinal(col 4) -> double, VectorUDAFMaxDouble(col 5) -> float + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), (_col0 + 6981.0) (type: double), ((_col0 + 6981.0) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0) + _col0) / _col0) (type: double), (- (_col0 + 6981.0)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 6, 8, 1, 7, 10, 2, 9, 3, 4, 12, 14, 5, 11] + selectExpressions: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 6:double, DoubleColAddDoubleColumn(col 7, col 0)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 7:double) -> 8:double, DoubleColDivideDoubleColumn(col 9, col 0)(children: DoubleColAddDoubleColumn(col 7, col 0)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 7:double) -> 9:double) -> 7:double, DoubleColUnaryMinus(col 9)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 9:double) -> 10:double, DoubleColModuloDoubleColumn(col 0, col 11)(children: DoubleColUnaryMinus(col 9)(children: DoubleColAddDoubleScalar(col 0, val 6981.0) -> 9:double) -> 11:double) -> 9:double, LongColUnaryMinus(col 1) -> 12:long, DoubleColDivideDoubleColumn(col 11, col 2)(children: CastLongToDouble(col 13)(children: LongColUnaryMinus(col 1) -> 13:long) -> 11:double) -> 14:double, DoubleColMultiplyDoubleScalar(col 4, val -26.28) -> 11:double Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2098,11 +2194,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: csmallint (type: smallint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2111,26 +2208,42 @@ STAGE PLANS: key expressions: _col0 (type: smallint) sort order: + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFStdSampFinal(col 1) -> double, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFVarPopFinal(col 3) -> double, VectorUDAFCountMerge(col 4) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3] keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -2138,10 +2251,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13] + selectExpressions: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 5:long, DecimalScalarDivideDecimalColumn(val -1.389, col 6)(children: CastLongToDecimal(col 0) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9, col 10)(children: CastLongToDouble(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 9:double, CastLongToDouble(col 2) -> 10:double) -> 11:double, LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 12:long, LongColUnaryMinus(col 13)(children: LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 13:long) -> 8:long, LongColSubtractLongScalar(col 4, val -89010) -> 13:long Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -2350,11 +2472,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFSumDouble(col 5) -> double className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false keys: cdouble (type: double) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2363,26 +2486,42 @@ STAGE PLANS: key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) + Group By Vectorization: + aggregators: VectorUDAFVarSampFinal(col 1) -> double, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFSumDouble(col 3) -> double, VectorUDAFVarPopFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFSumDouble(col 6) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: double) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -2390,10 +2529,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 7:double, DoubleColUnaryMinus(col 1) -> 8:double, DoubleColAddDoubleScalar(col 9, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9, col 12)(children: DoubleColUnaryMinus(col 1) -> 9:double, DoubleColAddDoubleScalar(col 11, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 1) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0, col 1) -> 9:double, DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14)(children: DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 14:double) -> 15:double Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 @@ -2644,13 +2792,14 @@ STAGE PLANS: Group By Operator aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) Group By Vectorization: - aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint + aggregators: VectorUDAFStdPopLong(col 2) -> struct, VectorUDAFAvgLong(col 1) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFVarSampLong(col 1) -> struct, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFVarSampDouble(col 4) -> struct, VectorUDAFAvgDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFVarPopLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFSumLong(col 2) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 8, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctimestamp1 (type: timestamp), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2659,26 +2808,42 @@ STAGE PLANS: key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF stddev_pop parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFStdPopFinal(col 2) -> double, VectorUDAFAvgFinal(col 3) -> double, VectorUDAFCountMerge(col 4) -> bigint, VectorUDAFMinLong(col 5) -> tinyint, VectorUDAFVarSampFinal(col 6) -> double, VectorUDAFVarPopFinal(col 7) -> double, VectorUDAFAvgFinal(col 8) -> double, VectorUDAFVarSampFinal(col 9) -> double, VectorUDAFAvgFinal(col 10) -> double, VectorUDAFMinDouble(col 11) -> double, VectorUDAFVarPopFinal(col 12) -> double, VectorUDAFStdPopFinal(col 13) -> double, VectorUDAFSumLong(col 14) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0, col 1 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2686,10 +2851,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 15, 16, 3, 17, 18, 4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 12, 31, 34, 37, 13, 14, 38, 40, 4, 39] + selectExpressions: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2) -> 16:double, DoubleColUnaryMinus(col 2) -> 17:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 18:double, LongColUnaryMinus(col 4) -> 19:long, DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 23:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 20)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 20:double) -> 23:double, DoubleColAddDoubleColumn(col 6, col 25)(children: DoubleColMultiplyDoubleColumn(col 26, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 25)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25)(children: DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, DoubleColDivideDoubleColumn(col 25, col 2)(children: CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 25:double, DoubleColSubtractDoubleColumn(col 28, col 30)(children: DoubleColAddDoubleColumn(col 6, col 29)(children: DoubleColMultiplyDoubleColumn(col 30, col 28)(children: DoubleColMultiplyDoubleColumn(col 28, col 29)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 28:double, DoubleColUnaryMinus(col 2) -> 29:double) -> 30:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 28:double) -> 29:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 31, col 29)(children: DoubleColMultiplyDoubleColumn(col 29, col 30)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 29:double, DoubleColUnaryMinus(col 2) -> 30:double) -> 31:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 30)(children: DoubleColUnaryMinus(col 28)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 28:double) -> 30:double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31, val 10.175)(children: DoubleColSubtractDoubleColumn(col 30, col 32)(children: DoubleColAddDoubleColumn(col 6, col 31)(children: DoubleColMultiplyDoubleColumn(col 32, col 30)(children: DoubleColMultiplyDoubleColumn(col 30, col 31)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 30:double, DoubleColUnaryMinus(col 2) -> 31:double) -> 32:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 30:double) -> 31:double) -> 30:double, DoubleColMultiplyDoubleColumn(col 33, col 31)(children: DoubleColMultiplyDoubleColumn(col 31, col 32)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 32:double) -> 33:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 31:double) -> 32:double) -> 31:double) -> 30:double, DoubleScalarModuloDoubleColumn(val 10.175, col 31)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 31:double) -> 32:double, LongColUnaryMinus(col 5) -> 24:long, DoubleColUnaryMinus(col 34)(children: DoubleColMultiplyDoubleColumn(col 31, col 33)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 33, col 10)(children: DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double, DecimalScalarDivideDecimalColumn(val -26.28, col 36)(children: CastLongToDecimal(col 35)(children: LongColUnaryMinus(col 5) -> 35:long) -> 36:decimal(3,0)) -> 37:decimal(8,6), DoubleColDivideDoubleColumn(col 33, col 7)(children: DoubleColAddDoubleColumn(col 6, col 38)(children: DoubleColMultiplyDoubleColumn(col 39, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 38)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double, DoubleColUnaryMinus(col 2) -> 38:double) -> 39:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 38:double) -> 33:double) -> 38:double, LongColUnaryMinus(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 40:long, DoubleColModuloDoubleScalar(col 33, val -26.28)(children: DoubleColAddDoubleColumn(col 6, col 39)(children: DoubleColMultiplyDoubleColumn(col 41, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 39)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double, DoubleColUnaryMinus(col 2) -> 39:double) -> 41:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 39:double) -> 33:double) -> 39:double Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -3021,13 +3195,14 @@ STAGE PLANS: Group By Operator aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) Group By Vectorization: - aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct + aggregators: VectorUDAFMaxDouble(col 4) -> float, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFMinLong(col 3) -> bigint, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFStdPopLong(col 1) -> struct, VectorUDAFAvgLong(col 2) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - vectorOutputConditionsNotMet: Vector output of VectorUDAFVarSampLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(col 5) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopLong(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -3036,26 +3211,42 @@ STAGE PLANS: key expressions: _col0 (type: boolean) sort order: + Map-reduce partition columns: _col0 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF var_samp parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col2] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 1) -> float, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFVarSampFinal(col 3) -> double, VectorUDAFAvgFinal(col 4) -> double, VectorUDAFMinLong(col 5) -> bigint, VectorUDAFVarPopFinal(col 6) -> double, VectorUDAFSumLong(col 7) -> bigint, VectorUDAFStdSampFinal(col 8) -> double, VectorUDAFStdPopFinal(col 9) -> double, VectorUDAFAvgFinal(col 10) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -3063,10 +3254,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 11, 12, 2, 14, 3, 15, 17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30] + selectExpressions: DoubleColUnaryMinus(col 1) -> 11:double, DoubleScalarDivideDoubleColumn(val -26.28, col 1)(children: col 1) -> 12:double, DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 3, col 1)(children: col 1) -> 15:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 17:double, DoubleColAddDoubleColumn(col 16, col 3)(children: CastDecimalToDouble(col 18)(children: DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 16:double) -> 19:double, DoubleColUnaryMinus(col 20)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 20:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 20:double, DoubleColModuloDoubleColumn(col 3, col 21)(children: DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 21:double) -> 22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 13)(children: CastLongToDecimal(col 5) -> 13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), FuncNegateDecimalToDecimal(col 27)(children: DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), DoubleColUnaryMinus(col 10) -> 21:double, DoubleColMultiplyDoubleColumn(col 10, col 29)(children: CastLongToDouble(col 7) -> 29:double) -> 30:double Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 @@ -3238,8 +3438,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3276,8 +3478,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3350,8 +3554,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3388,8 +3594,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3534,8 +3742,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3572,8 +3782,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3646,8 +3858,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3684,8 +3898,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3758,8 +3974,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 2) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3796,8 +4014,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3870,8 +4090,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 4) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -3908,8 +4130,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -3982,8 +4206,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 6) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -4020,8 +4246,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -4094,8 +4322,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 10) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -4132,8 +4362,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountMerge(col 0) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index ffac02c..66dcdad 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -287,8 +287,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -325,8 +327,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 @@ -412,8 +416,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -450,8 +456,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 030a71b..9fd16c9 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -117,24 +117,29 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct + aggregators: VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgLong(col 12) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 12) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -142,19 +147,33 @@ STAGE PLANS: Local Work: Map Reduce Local Work Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 2395091..4972677 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -3323,9 +3323,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1, col 2 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial @@ -3461,9 +3463,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 7) -> double className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: p_mfgr (type: string), p_brand (type: string) mode: hash diff --git ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index b6e7519..18c7db1 100644 --- ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -134,20 +134,35 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Reducer 3 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col3] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 2) -> int, VectorUDAFAvgFinal(col 3) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 diff --git ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 864b82f..a992f41 100644 --- ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -800,8 +800,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -838,8 +840,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 1) -> timestamp, VectorUDAFCountMerge(col 2) -> bigint, VectorUDAFCountMerge(col 3) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -908,25 +912,47 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFSumTimestamp(col 0) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reducer 2 Execution mode: vectorized Reduce Vectorization: @@ -942,8 +968,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumDouble(col 0) -> double className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true native: false + vectorProcessingMode: GLOBAL projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 @@ -1043,46 +1071,70 @@ STAGE PLANS: Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 0) -> double, VectorUDAFVarPopFinal(col 1) -> double, VectorUDAFVarPopFinal(col 2) -> double, VectorUDAFVarSampFinal(col 3) -> double, VectorUDAFStdPopFinal(col 4) -> double, VectorUDAFStdPopFinal(col 5) -> double, VectorUDAFStdPopFinal(col 6) -> double, VectorUDAFStdSampFinal(col 7) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 9, 10, 11, 12, 13, 14, 15] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 0) -> 8:double, VectorUDFAdaptor(_col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 9:boolean, VectorUDFAdaptor(_col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19) -> 10:boolean, VectorUDFAdaptor(_col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 3) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 6, decimalPlaces 3) -> 14:double, RoundWithNumDigitsDoubleToDouble(col 7, decimalPlaces 3) -> 15:double Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index efc2489..afcae8c 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -258,13 +258,14 @@ STAGE PLANS: Group By Operator aggregations: avg(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 12) -> struct + aggregators: VectorUDAFAvgDouble(col 12) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -273,6 +274,13 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: [0] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + partitionColumns: [0] + valueColumns: [1] Statistics: Num rows: 95 Data size: 7888 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -280,7 +288,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -292,23 +300,47 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: double Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: Aggregation Function UDF avg parameter expression for GROUPBY operator: Data type struct of Column[VALUE._col0] not supported - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + groupByVectorOutput: true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY._col0:tinyint, VALUE._col0:struct + partitionColumnCount: 0 Reduce Operator Tree: Group By Operator aggregations: avg(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFAvgFinal(col 1) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumns: [0] keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 95 Data size: 1048 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -390,9 +422,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash @@ -443,9 +477,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint) mode: mergepartial @@ -543,9 +579,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash @@ -596,9 +634,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0, col 1 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [] keys: KEY._col0 (type: tinyint), KEY._col1 (type: double) mode: mergepartial @@ -609,9 +649,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint className: VectorGroupByOperator + groupByMode: COMPLETE vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: STREAMING projectedOutputColumns: [0] keys: _col0 (type: tinyint) mode: complete @@ -739,9 +781,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash @@ -794,9 +838,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 1) -> bigint className: VectorGroupByOperator + groupByMode: MERGEPARTIAL vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: MERGE_PARTIAL projectedOutputColumns: [0] keys: KEY._col0 (type: double) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_aggregate_9.q.out ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 3ad29ef..0f4855c 100644 --- ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -101,10 +101,10 @@ POSTHOOK: Lineage: vectortab2korc.si SIMPLE [(vectortab2k)vectortab2k.FieldSchem POSTHOOK: Lineage: vectortab2korc.t SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:t, type:tinyint, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts, type:timestamp, comment:null), ] POSTHOOK: Lineage: vectortab2korc.ts2 SIMPLE [(vectortab2k)vectortab2k.FieldSchema(name:ts2, type:timestamp, comment:null), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select min(dc), max(dc), sum(dc), avg(dc) from vectortab2korc POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -136,28 +136,39 @@ STAGE PLANS: Group By Operator aggregations: min(dc), max(dc), sum(dc), avg(dc) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct + aggregators: VectorUDAFMinDecimal(col 6) -> decimal(38,18), VectorUDAFMaxDecimal(col 6) -> decimal(38,18), VectorUDAFSumDecimal(col 6) -> decimal(38,18), VectorUDAFAvgDecimal(col 6) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 6) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [6] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -166,8 +177,10 @@ STAGE PLANS: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -195,3 +208,217 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### -4997414117561.546875000000000000 4994550248722.298828000000000000 -10252745435816.024410000000000000 -5399023399.587163986308583465 +PREHOOK: query: explain vectorization detail +select min(d), max(d), sum(d), avg(d) from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select min(d), max(d), sum(d), avg(d) from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: d (type: double) + outputColumnNames: d + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5] + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(d), max(d), sum(d), avg(d) + Group By Vectorization: + aggregators: VectorUDAFMinDouble(col 5) -> double, VectorUDAFMaxDouble(col 5) -> double, VectorUDAFSumDouble(col 5) -> double, VectorUDAFAvgDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [5] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(d), max(d), sum(d), avg(d) from vectortab2korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +POSTHOOK: query: select min(d), max(d), sum(d), avg(d) from vectortab2korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +-4999829.07 4997627.14 -1.7516847286999977E8 -92193.93308947356 +PREHOOK: query: explain vectorization detail +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: vectortab2korc + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10] + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ts), max(ts), sum(ts), avg(ts) + Group By Vectorization: + aggregators: VectorUDAFMinTimestamp(col 10) -> timestamp, VectorUDAFMaxTimestamp(col 10) -> timestamp, VectorUDAFSumTimestamp(col 10) -> double, VectorUDAFAvgTimestamp(col 10) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 13 + includeColumns: [10] + dataColumns: t:tinyint, si:smallint, i:int, b:bigint, f:float, d:double, dc:decimal(38,18), bo:boolean, s:string, s2:string, ts:timestamp, ts2:timestamp, dt:date + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +PREHOOK: type: QUERY +PREHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +POSTHOOK: query: select min(ts), max(ts), sum(ts), avg(ts) from vectortab2korc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@vectortab2korc +#### A masked pattern was here #### +2013-02-18 21:06:48 2081-02-22 01:21:53 4.591384881081E12 2.4254542425150557E9 diff --git ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out index 3519a87..e234c0a 100644 --- ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/vector_binary_join_groupby.q.out @@ -190,8 +190,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 22) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -224,8 +226,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -342,9 +346,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 10 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: bin (type: binary) mode: hash @@ -378,8 +384,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: binary) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_cast_constant.q.out ql/src/test/results/clientpositive/vector_cast_constant.q.out index 7afdb72..3cd708b 100644 --- ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -139,13 +139,14 @@ STAGE PLANS: Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct + aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 11:long) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 12:double) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 13:decimal(10,0)) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -154,6 +155,11 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) @@ -161,7 +167,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -174,8 +180,10 @@ STAGE PLANS: Group By Operator aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out index 03bf436..26dfad1 100644 --- ql/src/test/results/clientpositive/vector_char_2.q.out +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -92,9 +92,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash @@ -129,8 +131,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: char(20)) mode: mergepartial @@ -283,9 +287,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] keys: _col0 (type: char(20)) mode: hash @@ -320,8 +326,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: char(20)) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_coalesce_2.q.out ql/src/test/results/clientpositive/vector_coalesce_2.q.out index 431cfdc..336ae04 100644 --- ql/src/test/results/clientpositive/vector_coalesce_2.q.out +++ ql/src/test/results/clientpositive/vector_coalesce_2.q.out @@ -48,8 +48,10 @@ STAGE PLANS: Group By Operator aggregations: sum(_col1) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: string) mode: hash @@ -65,8 +67,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial @@ -205,9 +209,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 4) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -241,8 +247,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_complex_all.q.out ql/src/test/results/clientpositive/vector_complex_all.q.out index e6f0307..7125df8 100644 --- ql/src/test/results/clientpositive/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/vector_complex_all.q.out @@ -75,7 +75,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### orc_create_complex.str orc_create_complex.mp orc_create_complex.lst orc_create_complex.strct orc_create_complex.val -line1 {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] {"a":"one","b":"two"} 0 +line1 {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] {"a":"one","b":"two"} 0 line2 {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"three","b":"four"} 0 line3 {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] {"a":"five","b":"six"} 0 PREHOOK: query: SELECT str FROM orc_create_complex @@ -99,7 +99,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### strct mp lst -{"a":"one","b":"two"} {"key13":"value13","key11":"value11","key12":"value12"} ["a","b","c"] +{"a":"one","b":"two"} {"key11":"value11","key12":"value12","key13":"value13"} ["a","b","c"] {"a":"three","b":"four"} {"key21":"value21","key22":"value22","key23":"value23"} ["d","e","f"] {"a":"five","b":"six"} {"key31":"value31","key32":"value32","key33":"value33"} ["g","h","i"] PREHOOK: query: SELECT lst, str FROM orc_create_complex @@ -123,7 +123,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@orc_create_complex #### A masked pattern was here #### mp str -{"key13":"value13","key11":"value11","key12":"value12"} line1 +{"key11":"value11","key12":"value12","key13":"value13"} line1 {"key21":"value21","key22":"value22","key23":"value23"} line2 {"key31":"value31","key32":"value32","key33":"value33"} line3 PREHOOK: query: SELECT strct, str FROM orc_create_complex diff --git ql/src/test/results/clientpositive/vector_complex_join.q.out ql/src/test/results/clientpositive/vector_complex_join.q.out index 513c159..dfc30e4 100644 --- ql/src/test/results/clientpositive/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/vector_complex_join.q.out @@ -63,12 +63,23 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: cint is not null (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -76,21 +87,32 @@ STAGE PLANS: keys: 0 _col2 (type: int) 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Small Table expression for MAPJOIN operator: Data type map of Column[_col1] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Local Work: Map Reduce Local Work @@ -214,7 +236,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Predicate expression for FILTER operator: Data type array of Column[a] not supported + notVectorizedReason: Predicate expression for FILTER operator: org.apache.hadoop.hive.ql.metadata.HiveException: Unexpected hive type name array vectorized: false Local Work: Map Reduce Local Work diff --git ql/src/test/results/clientpositive/vector_count.q.out ql/src/test/results/clientpositive/vector_count.q.out index ff6993e..1068c78 100644 --- ql/src/test/results/clientpositive/vector_count.q.out +++ ql/src/test/results/clientpositive/vector_count.q.out @@ -78,9 +78,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFSumLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2] keys: a (type: int), b (type: int), c (type: int) mode: hash @@ -114,8 +116,10 @@ STAGE PLANS: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col2) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial @@ -182,9 +186,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 4:long) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 3) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 1) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] keys: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int) mode: hash @@ -217,8 +223,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -304,8 +312,10 @@ STAGE PLANS: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(DISTINCT KEY._col1:1._col0), sum(VALUE._col0) Group By Vectorization: + groupByMode: COMPLETE vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: complete @@ -393,8 +403,10 @@ STAGE PLANS: Group By Operator aggregations: count(1), count(), count(KEY._col0:0._col0), count(KEY._col0:1._col0), count(KEY._col0:2._col0), count(KEY._col0:3._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0), count(DISTINCT KEY._col0:3._col0), count(DISTINCT KEY._col0:4._col0, KEY._col0:4._col1), count(DISTINCT KEY._col0:5._col0, KEY._col0:5._col1), count(DISTINCT KEY._col0:6._col0, KEY._col0:6._col1), count(DISTINCT KEY._col0:7._col0, KEY._col0:7._col1), count(DISTINCT KEY._col0:8._col0, KEY._col0:8._col1), count(DISTINCT KEY._col0:9._col0, KEY._col0:9._col1), count(DISTINCT KEY._col0:10._col0, KEY._col0:10._col1, KEY._col0:10._col2), count(DISTINCT KEY._col0:11._col0, KEY._col0:11._col1, KEY._col0:11._col2), count(DISTINCT KEY._col0:12._col0, KEY._col0:12._col1, KEY._col0:12._col2), count(DISTINCT KEY._col0:13._col0, KEY._col0:13._col1, KEY._col0:13._col2), count(DISTINCT KEY._col0:14._col0, KEY._col0:14._col1, KEY._col0:14._col2, KEY._col0:14._col3) Group By Vectorization: + groupByMode: COMPLETE vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 diff --git ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index 34c60c0..04c90a2 100644 --- ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -65,9 +65,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: cint (type: int) mode: hash @@ -101,8 +103,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), max(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), count(VALUE._col8) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial @@ -198,13 +202,14 @@ STAGE PLANS: Group By Operator aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1) -> bigint, VectorUDAFMaxDecimal(col 1) -> decimal(20,10), VectorUDAFMinDecimal(col 1) -> decimal(20,10), VectorUDAFSumDecimal(col 1) -> decimal(38,18), VectorUDAFAvgDecimal(col 1) -> struct, VectorUDAFStdPopDecimal(col 1) -> struct, VectorUDAFStdSampDecimal(col 1) -> struct, VectorUDAFCount(col 2) -> bigint, VectorUDAFMaxDecimal(col 2) -> decimal(23,14), VectorUDAFMinDecimal(col 2) -> decimal(23,14), VectorUDAFSumDecimal(col 2) -> decimal(38,18), VectorUDAFAvgDecimal(col 2) -> struct, VectorUDAFStdPopDecimal(col 2) -> struct, VectorUDAFStdSampDecimal(col 2) -> struct, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 1) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFAvgDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampDecimal(col 2) -> struct output type STRUCT requires PRIMITIVE IS false keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -213,13 +218,18 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -232,8 +242,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_decimal_precision.q.out ql/src/test/results/clientpositive/vector_decimal_precision.q.out index 690441f..0dc5a67 100644 --- ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -578,24 +578,30 @@ STAGE PLANS: Group By Operator aggregations: avg(dec), sum(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) + aggregators: VectorUDAFAvgDecimal(col 0) -> struct, VectorUDAFSumDecimal(col 0) -> decimal(38,18) className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -608,8 +614,10 @@ STAGE PLANS: Group By Operator aggregations: avg(VALUE._col0), sum(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_distinct_2.q.out ql/src/test/results/clientpositive/vector_distinct_2.q.out index b6e9527..db688bf 100644 --- ql/src/test/results/clientpositive/vector_distinct_2.q.out +++ ql/src/test/results/clientpositive/vector_distinct_2.q.out @@ -136,9 +136,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: t (type: tinyint), s (type: string) mode: hash @@ -170,8 +172,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index b7580f3..a95fdf6 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -41,9 +41,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -75,8 +77,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: partial2 @@ -85,8 +89,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: partial2 outputColumnNames: _col0 @@ -131,8 +137,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -195,9 +203,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -229,8 +239,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: partial2 @@ -239,8 +251,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: partial2 outputColumnNames: _col0 @@ -285,8 +299,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -357,9 +373,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -391,8 +409,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: partial2 @@ -401,8 +421,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: partial2 outputColumnNames: _col0 @@ -447,8 +469,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -519,9 +543,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 2 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: cint (type: int) mode: hash @@ -553,8 +579,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: partial2 @@ -563,8 +591,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0) Group By Vectorization: + groupByMode: PARTIAL2 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: partial2 outputColumnNames: _col0 @@ -609,8 +639,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 9de8e6e..34b571e 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -81,8 +81,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL1 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: partial1 @@ -128,8 +130,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: FINAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: final diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index 25cf5b2..bc86c15 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -81,8 +81,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: PARTIAL1 vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: partial1 @@ -128,8 +130,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: FINAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: final diff --git ql/src/test/results/clientpositive/vector_groupby_3.q.out ql/src/test/results/clientpositive/vector_groupby_3.q.out index 9a1256b..d360e44 100644 --- ql/src/test/results/clientpositive/vector_groupby_3.q.out +++ ql/src/test/results/clientpositive/vector_groupby_3.q.out @@ -138,9 +138,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 8 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: t (type: tinyint), s (type: string) mode: hash @@ -174,8 +176,10 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint), KEY._col1 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out index f0c87c4..17ebb08 100644 --- ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/vector_groupby_mapjoin.q.out @@ -52,8 +52,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFCount(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -84,8 +86,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 @@ -334,9 +338,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: key (type: string) mode: hash @@ -368,8 +374,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index bc59510..a72cb55 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -11,20 +11,20 @@ PREHOOK: query: create table store_sales_txt ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) -row format delimited fields terminated by '|' +row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -42,20 +42,20 @@ POSTHOOK: query: create table store_sales_txt ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) -row format delimited fields terminated by '|' +row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -81,18 +81,19 @@ PREHOOK: query: create table store_sales ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_wholesale_cost_decimal decimal(38,18), + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) stored as orc tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") @@ -112,18 +113,19 @@ POSTHOOK: query: create table store_sales ss_promo_sk int, ss_ticket_number int, ss_quantity int, - ss_wholesale_cost float, - ss_list_price float, - ss_sales_price float, - ss_ext_discount_amt float, - ss_ext_sales_price float, - ss_ext_wholesale_cost float, - ss_ext_list_price float, - ss_ext_tax float, - ss_coupon_amt float, - ss_net_paid float, - ss_net_paid_inc_tax float, - ss_net_profit float + ss_wholesale_cost double, + ss_wholesale_cost_decimal decimal(38,18), + ss_list_price double, + ss_sales_price double, + ss_ext_discount_amt double, + ss_ext_sales_price double, + ss_ext_wholesale_cost double, + ss_ext_list_price double, + ss_ext_tax double, + ss_coupon_amt double, + ss_net_paid double, + ss_net_paid_inc_tax double, + ss_net_profit double ) stored as orc tblproperties ("orc.stripe.size"="33554432", "orc.compress.size"="16384") @@ -144,6 +146,7 @@ ss_sold_date_sk , ss_ticket_number , ss_quantity , ss_wholesale_cost , + cast(ss_wholesale_cost as decimal(38,18)), ss_list_price , ss_sales_price , ss_ext_discount_amt , @@ -173,6 +176,7 @@ ss_sold_date_sk , ss_ticket_number , ss_quantity , ss_wholesale_cost , + cast(ss_wholesale_cost as decimal(38,18)), ss_list_price , ss_sales_price , ss_ext_discount_amt , @@ -190,27 +194,28 @@ POSTHOOK: Input: default@store_sales_txt POSTHOOK: Output: default@store_sales POSTHOOK: Lineage: store_sales.ss_addr_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_addr_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_cdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_cdemo_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_coupon_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_coupon_amt, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_customer_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_customer_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_discount_amt SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_discount_amt, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_list_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_sales_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_tax, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_ext_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ext_wholesale_cost, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_hdemo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_hdemo_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_item_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_item_sk, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:float, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_list_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_list_price, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_paid_inc_tax SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_paid_inc_tax, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_net_profit SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_net_profit, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_promo_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_promo_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_quantity SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_quantity, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_sales_price SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sales_price, type:double, comment:null), ] POSTHOOK: Lineage: store_sales.ss_sold_date_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_date_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_sold_time_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_sold_time_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_store_sk SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_store_sk, type:int, comment:null), ] POSTHOOK: Lineage: store_sales.ss_ticket_number SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_ticket_number, type:int, comment:null), ] -POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:float, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost SIMPLE [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:double, comment:null), ] +POSTHOOK: Lineage: store_sales.ss_wholesale_cost_decimal EXPRESSION [(store_sales_txt)store_sales_txt.FieldSchema(name:ss_wholesale_cost, type:double, comment:null), ] PREHOOK: query: explain vectorization expression select ss_ticket_number @@ -244,10 +249,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number @@ -255,18 +260,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -276,7 +283,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -294,13 +301,15 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -323,7 +332,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -342,13 +351,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4820 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 4820 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -439,10 +448,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator expressions: ss_ticket_number (type: int) outputColumnNames: ss_ticket_number @@ -450,18 +459,20 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -471,7 +482,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -488,27 +499,31 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(_col0) Group By Vectorization: + groupByMode: COMPLETE vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -531,7 +546,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true @@ -549,10 +564,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -674,24 +689,26 @@ POSTHOOK: Input: default@store_sales 82 PREHOOK: query: explain vectorization expression select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number @@ -711,42 +728,51 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity - Select Vectorization: - className: VectorSelectOperator + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: min(ss_quantity) - Group By Vectorization: - aggregators: VectorUDAFMinLong(col 10) -> int - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 9, col 2 - native: false - projectedOutputColumns: [0] - keys: ss_ticket_number (type: int), ss_item_sk (type: int) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkOperator + predicateExpression: FilterLongColEqualLongScalar(col 9, val 1) -> boolean + predicate: (ss_ticket_number = 1) (type: boolean) + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_item_sk (type: int), ss_quantity (type: int), ss_wholesale_cost (type: double), ss_wholesale_cost_decimal (type: decimal(38,18)) + outputColumnNames: ss_item_sk, ss_quantity, ss_wholesale_cost, ss_wholesale_cost_decimal + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2, 10, 11, 12] + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ss_quantity), max(ss_wholesale_cost), max(ss_wholesale_cost_decimal) + Group By Vectorization: + aggregators: VectorUDAFMinLong(col 10) -> int, VectorUDAFMaxDouble(col 11) -> double, VectorUDAFMaxDecimal(col 12) -> decimal(38,18) + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 2 native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] + keys: ss_item_sk (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) Execution mode: vectorized Map Vectorization: enabled: true @@ -762,29 +788,33 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), max(VALUE._col2) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null - keys: KEY._col0 (type: int), KEY._col1 (type: int) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(38,18)) + outputColumnNames: _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), sum(_col2) + aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null - keys: _col1 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + keys: 1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -798,17 +828,18 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true @@ -823,17 +854,29 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), sum(VALUE._col5), avg(VALUE._col6) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -842,12 +885,13 @@ STAGE PLANS: ListSink PREHOOK: query: select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number @@ -855,106 +899,26 @@ PREHOOK: type: QUERY PREHOOK: Input: default@store_sales #### A masked pattern was here #### POSTHOOK: query: select - ss_ticket_number, sum(ss_item_sk), sum(q) + ss_ticket_number, sum(ss_item_sk), sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales + where ss_ticket_number = 1 group by ss_ticket_number, ss_item_sk) a group by ss_ticket_number order by ss_ticket_number POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales #### A masked pattern was here #### -1 85411 816 -2 157365 812 -3 147948 710 -4 69545 411 -5 163232 840 -6 86307 627 -7 114874 563 -8 117953 662 -9 173250 690 -10 60338 602 -11 138545 657 -12 97181 586 -13 109484 555 -14 137333 442 -15 176829 652 -16 115004 654 -17 105008 460 -18 165135 738 -19 128252 831 -20 104789 374 -21 72771 469 -22 128153 449 -23 110253 603 -24 100662 1029 -25 118714 760 -26 81596 502 -27 164068 871 -28 58632 409 -29 133777 417 -30 130451 772 -31 114967 586 -32 142021 592 -33 151818 691 -34 112559 662 -35 137027 780 -36 118285 538 -37 94528 401 -38 81368 521 -39 101064 937 -40 84435 480 -41 112444 688 -42 95731 840 -43 57298 410 -44 159880 839 -45 68919 474 -46 111212 374 -47 78210 416 -48 94459 445 -49 90879 589 -50 37821 407 -51 124927 612 -52 98099 489 -53 138706 609 -54 87478 354 -55 90290 406 -56 78812 372 -57 101175 597 -58 88044 202 -59 104582 753 -60 99218 900 -61 66514 392 -62 126713 527 -63 98778 648 -64 131659 380 -65 86990 494 -66 108808 492 -67 75250 711 -68 91671 548 -69 92821 405 -70 75021 319 -71 124484 748 -72 161470 744 -73 104358 621 -74 88609 688 -75 92940 649 -76 75853 580 -77 124755 873 -78 98285 573 -79 160595 581 -80 151471 704 -81 105109 429 -82 55611 254 +1 85411 816 58.285714285714285 621.35 44.38214285714286 621.350000000000000000 44.382142857142857143 PREHOOK: query: explain vectorization expression select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -963,10 +927,10 @@ order by ss_ticket_number, ss_item_sk PREHOOK: type: QUERY POSTHOOK: query: explain vectorization expression select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -988,31 +952,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23] Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity + expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int), ss_wholesale_cost (type: double), ss_wholesale_cost_decimal (type: decimal(38,18)) + outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity, ss_wholesale_cost, ss_wholesale_cost_decimal Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumns: [2, 9, 10, 11, 12] + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ss_quantity) + aggregations: min(ss_quantity), max(ss_wholesale_cost), max(ss_wholesale_cost_decimal) Group By Vectorization: - aggregators: VectorUDAFMinLong(col 10) -> int + aggregators: VectorUDAFMinLong(col 10) -> int, VectorUDAFMaxDouble(col 11) -> double, VectorUDAFMaxDecimal(col 12) -> decimal(38,18) className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 9, col 2 native: false - projectedOutputColumns: [0] + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1022,8 +988,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int) + Statistics: Num rows: 1000 Data size: 241204 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) Execution mode: vectorized Map Vectorization: enabled: true @@ -1039,35 +1005,43 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0) + aggregations: min(VALUE._col0), max(VALUE._col1), max(VALUE._col2) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2) + aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) Group By Vectorization: + groupByMode: COMPLETE vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col1 (type: int), _col0 (type: int) mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1075,7 +1049,7 @@ STAGE PLANS: TableScan TableScan Vectorization: native: true - projectedOutputColumns: [0, 1, 2] + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1084,8 +1058,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) Execution mode: vectorized Map Vectorization: enabled: true @@ -1101,12 +1075,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: decimal(38,18)), VALUE._col5 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1119,10 +1093,10 @@ STAGE PLANS: ListSink PREHOOK: query: select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -1132,10 +1106,10 @@ PREHOOK: type: QUERY PREHOOK: Input: default@store_sales #### A masked pattern was here #### POSTHOOK: query: select - ss_ticket_number, ss_item_sk, sum(q) + ss_ticket_number, ss_item_sk, sum(q), avg(q), sum(wc), avg(wc), sum(decwc), avg(decwc) from (select - ss_ticket_number, ss_item_sk, min(ss_quantity) q + ss_ticket_number, ss_item_sk, min(ss_quantity) q, max(ss_wholesale_cost) wc, max(ss_wholesale_cost_decimal) decwc from store_sales group by ss_ticket_number, ss_item_sk) a @@ -1144,1003 +1118,1003 @@ order by ss_ticket_number, ss_item_sk POSTHOOK: type: QUERY POSTHOOK: Input: default@store_sales #### A masked pattern was here #### -1 49 5 -1 173 65 -1 1553 50 -1 3248 58 -1 3617 79 -1 4553 100 -1 4583 72 -1 4682 44 -1 5527 88 -1 5981 14 -1 10993 91 -1 13283 37 -1 13538 14 -1 13631 99 -2 1363 4 -2 2930 36 -2 3740 49 -2 6928 65 -2 7654 25 -2 9436 79 -2 10768 30 -2 12068 74 -2 12223 78 -2 13340 71 -2 13927 93 -2 14701 58 -2 15085 88 -2 15782 62 -2 17420 NULL -3 246 96 -3 1531 NULL -3 3525 42 -3 4698 98 -3 5355 53 -3 10693 27 -3 12447 82 -3 13021 64 -3 14100 79 -3 14443 4 -3 15786 56 -3 16869 4 -3 17263 17 -3 17971 88 -4 163 17 -4 1576 74 -4 5350 86 -4 5515 23 -4 6988 23 -4 7990 56 -4 8452 27 -4 9685 21 -4 11036 41 -4 12790 43 -5 1808 NULL -5 1940 60 -5 5842 50 -5 6068 76 -5 6466 36 -5 11324 52 -5 11590 15 -5 12650 66 -5 13562 64 -5 13958 60 -5 14599 83 -5 14686 91 -5 15752 66 -5 16195 50 -5 16792 71 -6 2549 62 -6 2647 100 -6 3049 31 -6 3291 100 -6 6437 72 -6 8621 NULL -6 10355 94 -6 10895 1 -6 11705 61 -6 13245 64 -6 13513 42 -7 4627 9 -7 4795 73 -7 4833 88 -7 5183 51 -7 5905 69 -7 8955 54 -7 9751 4 -7 10487 52 -7 12571 82 -7 15179 12 -7 15333 NULL -7 17255 69 -8 665 31 -8 4183 90 -8 5929 83 -8 7115 54 -8 11365 7 -8 11893 95 -8 12041 95 -8 13427 87 -8 16671 20 -8 17119 51 -8 17545 49 -9 69 11 -9 889 6 -9 1185 62 -9 4623 34 -9 7945 83 -9 8334 71 -9 12027 27 -9 12969 59 -9 13483 NULL -9 13717 53 -9 15133 15 -9 16083 32 -9 16363 54 -9 16461 66 -9 16659 84 -9 17310 33 -10 755 74 -10 1425 92 -10 1511 76 -10 3433 83 -10 3933 52 -10 4357 17 -10 5863 47 -10 9811 28 -10 13803 66 -10 15447 67 -11 157 84 -11 1315 70 -11 7519 68 -11 7608 66 -11 9901 57 -11 10699 33 -11 11490 NULL -11 11991 38 -11 12438 16 -11 15157 96 -11 15649 33 -11 17226 11 -11 17395 85 -12 373 57 -12 1591 82 -12 4888 56 -12 6148 36 -12 6248 36 -12 9616 66 -12 9788 73 -12 13399 46 -12 14746 26 -12 14944 9 -12 15440 99 -13 868 NULL -13 1760 12 -13 1898 NULL -13 2108 9 -13 2191 NULL -13 4430 73 -13 5971 80 -13 6085 58 -13 6140 15 -13 6682 80 -13 7640 48 -13 7723 27 -13 10096 12 -13 11758 34 -13 16894 87 -13 17240 20 -14 177 41 -14 769 20 -14 4507 4 -14 10175 19 -14 11549 6 -14 11653 60 -14 11817 81 -14 12587 NULL -14 13069 77 -14 13515 57 -14 13845 17 -14 16741 46 -14 16929 14 -15 4241 21 -15 4505 59 -15 4777 28 -15 7391 98 -15 8336 15 -15 8353 NULL -15 8690 32 -15 8707 21 -15 10361 39 -15 11659 80 -15 13172 25 -15 16619 81 -15 17267 7 -15 17330 82 -15 17564 26 -15 17857 38 -16 457 60 -16 1888 4 -16 4144 94 -16 6008 59 -16 7504 51 -16 8887 35 -16 9769 42 -16 9790 17 -16 9997 94 -16 11168 86 -16 11920 29 -16 16226 13 -16 17246 70 -17 2092 37 -17 4678 34 -17 6811 70 -17 9214 57 -17 10543 54 -17 11203 21 -17 13177 45 -17 13826 32 -17 15781 76 -17 17683 34 -18 2440 40 -18 5251 41 -18 7378 94 -18 8779 9 -18 8884 18 -18 9886 62 -18 11584 76 -18 11890 7 -18 12602 81 -18 12826 93 -18 12860 18 -18 14011 95 -18 14372 76 -18 14377 15 -18 17995 13 -19 1094 48 -19 3133 96 -19 3376 84 -19 4882 84 -19 6772 97 -19 7087 1 -19 7814 29 -19 8662 97 -19 9094 49 -19 9346 39 -19 10558 82 -19 10651 46 -19 11914 59 -19 16330 NULL -19 17539 20 -20 1451 89 -20 2618 4 -20 5312 9 -20 5425 15 -20 5483 8 -20 6026 21 -20 7207 90 -20 8714 NULL -20 9086 4 -20 9800 32 -20 13601 17 -20 14935 NULL -20 15131 85 -21 230 48 -21 1810 59 -21 2870 50 -21 5170 45 -21 5998 51 -21 6476 49 -21 9187 14 -21 12266 47 -21 14368 18 -21 14396 88 -22 9985 70 -22 10474 31 -22 11599 66 -22 12415 10 -22 15310 15 -22 16396 85 -22 16922 88 -22 17392 14 -22 17660 70 -23 319 86 -23 7242 37 -23 8181 13 -23 8413 1 -23 9093 38 -23 9097 81 -23 11220 91 -23 11257 64 -23 12397 80 -23 15403 96 -23 17631 16 -24 407 53 -24 1389 72 -24 1795 21 -24 2497 85 -24 3103 73 -24 4425 57 -24 4749 28 -24 4873 41 -24 5653 92 -24 6043 1 -24 6751 82 -24 7375 97 -24 10265 93 -24 11551 48 -24 13303 97 -24 16483 89 -25 1333 55 -25 2150 100 -25 2608 76 -25 3454 100 -25 4880 29 -25 5954 34 -25 6955 40 -25 7874 65 -25 9472 48 -25 10159 24 -25 14488 26 -25 14635 68 -25 17000 40 -25 17752 55 -26 1989 26 -26 5053 4 -26 5385 97 -26 5721 81 -26 6647 64 -26 7337 45 -26 9679 18 -26 11895 77 -26 12851 56 -26 15039 34 -27 1305 44 -27 2137 96 -27 2671 92 -27 5831 61 -27 7139 59 -27 8167 28 -27 10757 15 -27 11441 15 -27 11509 65 -27 12237 89 -27 12749 31 -27 13885 66 -27 15025 26 -27 16029 59 -27 16419 65 -27 16767 60 -28 1807 98 -28 2817 8 -28 2967 29 -28 4483 78 -28 5437 15 -28 6411 3 -28 7965 93 -28 8043 58 -28 8407 14 -28 10295 13 -29 20 18 -29 1363 75 -29 2930 23 -29 3740 5 -29 7654 20 -29 9458 33 -29 10795 33 -29 12068 37 -29 12223 59 -29 13340 21 -29 13693 NULL -29 15085 40 -29 15626 NULL -29 15782 53 -30 217 91 -30 1951 59 -30 3238 16 -30 3506 15 -30 3928 87 -30 5431 77 -30 6752 69 -30 7870 7 -30 8666 21 -30 12572 33 -30 12670 20 -30 13579 75 -30 14848 62 -30 17348 62 -30 17875 78 -31 913 54 -31 4963 67 -31 6617 11 -31 6917 4 -31 7513 82 -31 11739 95 -31 14575 97 -31 14727 41 -31 15341 31 -31 15411 53 -31 16251 51 -32 1115 61 -32 2095 34 -32 2887 8 -32 4339 6 -32 4537 22 -32 4808 NULL -32 5798 87 -32 7547 24 -32 9683 26 -32 11005 46 -32 11348 41 -32 12134 21 -32 15001 57 -32 15644 34 -32 16421 74 -32 17659 51 -33 4798 27 -33 7300 3 -33 9649 36 -33 10376 21 -33 11119 92 -33 11756 26 -33 12643 89 -33 12760 54 -33 12964 80 -33 14125 66 -33 14158 82 -33 14692 93 -33 15478 22 -34 1526 91 -34 1717 53 -34 2312 6 -34 4118 88 -34 5197 63 -34 5449 9 -34 6193 61 -34 9325 3 -34 9766 83 -34 12016 42 -34 12290 53 -34 12512 60 -34 13814 20 -34 16324 30 -35 411 51 -35 2377 52 -35 3667 97 -35 4325 56 -35 5179 83 -35 11635 87 -35 11661 81 -35 14239 55 -35 15619 45 -35 15757 9 -35 17341 92 -35 17365 65 -35 17451 7 -36 1115 80 -36 2095 43 -36 2887 31 -36 7547 46 -36 11005 49 -36 11349 80 -36 15001 54 -36 15645 23 -36 16421 25 -36 17561 16 -36 17659 91 -37 2997 94 -37 7283 87 -37 10715 52 -37 10929 88 -37 13171 6 -37 15337 62 -37 16971 12 -37 17125 NULL -38 757 2 -38 2164 17 -38 3439 84 -38 4154 35 -38 5113 73 -38 6220 98 -38 7018 15 -38 7784 56 -38 8870 15 -38 9710 7 -38 10441 62 -38 15698 57 -39 386 89 -39 1598 64 -39 3476 73 -39 3943 64 -39 4190 86 -39 4957 24 -39 5393 98 -39 7097 78 -39 7118 67 -39 7604 49 -39 7697 24 -39 8078 54 -39 8411 96 -39 15491 54 -39 15625 17 -40 2854 71 -40 3490 65 -40 3985 63 -40 5098 35 -40 5318 87 -40 10094 80 -40 10912 23 -40 12050 NULL -40 13658 53 -40 16976 3 -41 10 50 -41 64 29 -41 3380 88 -41 5566 11 -41 6310 90 -41 7402 69 -41 7603 94 -41 9322 8 -41 10915 81 -41 14788 15 -41 15242 87 -41 15328 46 -41 16514 20 -42 619 69 -42 976 100 -42 1436 94 -42 2314 74 -42 2392 14 -42 2602 30 -42 3346 74 -42 3613 30 -42 6058 30 -42 6134 92 -42 8462 23 -42 9740 52 -42 10016 57 -42 10471 19 -42 12550 41 -42 15002 41 -43 2923 16 -43 3344 22 -43 3911 26 -43 4364 77 -43 4691 41 -43 5773 85 -43 5852 16 -43 11771 30 -43 14669 97 -44 2351 56 -44 2623 18 -44 7303 14 -44 7527 67 -44 9059 68 -44 11707 83 -44 12341 20 -44 13331 98 -44 13449 45 -44 14149 80 -44 15803 81 -44 16491 56 -44 16837 92 -44 16909 61 -45 811 62 -45 1479 49 -45 3265 98 -45 5309 18 -45 7363 87 -45 10115 68 -45 11095 40 -45 13133 46 -45 16349 6 -46 1960 12 -46 3010 67 -46 7040 33 -46 8065 NULL -46 11426 72 -46 13042 58 -46 15595 32 -46 16540 30 -46 17150 57 -46 17384 13 -47 254 NULL -47 481 30 -47 1132 66 -47 1916 71 -47 3085 51 -47 3202 7 -47 3878 NULL -47 4774 11 -47 5008 82 -47 5305 NULL -47 5468 7 -47 7214 1 -47 9770 33 -47 13246 47 -47 13477 10 -48 1761 22 -48 2820 4 -48 2829 65 -48 4431 39 -48 5971 29 -48 6085 1 -48 6684 44 -48 9199 88 -48 11259 NULL -48 12468 62 -48 13153 74 -48 17799 17 -49 749 60 -49 2135 4 -49 5342 69 -49 5852 47 -49 6805 40 -49 7141 94 -49 9049 68 -49 9553 71 -49 12737 48 -49 15155 84 -49 16361 4 -50 1280 69 -50 1312 30 -50 1909 53 -50 1984 40 -50 3097 64 -50 5023 NULL -50 7135 69 -50 16081 82 -51 422 21 -51 3091 28 -51 4687 6 -51 5029 12 -51 5059 51 -51 6565 33 -51 8384 79 -51 9311 90 -51 10133 54 -51 11234 NULL -51 12625 53 -51 13199 97 -51 17483 22 -51 17705 66 -52 2420 90 -52 3334 73 -52 6098 NULL -52 7606 45 -52 11488 76 -52 15649 29 -52 16646 48 -52 17402 91 -52 17456 37 -53 1114 40 -53 2095 62 -53 2786 70 -53 2887 39 -53 7546 58 -53 11348 38 -53 13220 76 -53 13795 38 -53 15991 37 -53 16420 14 -53 16648 79 -53 17296 43 -53 17560 15 -54 702 40 -54 825 50 -54 1165 62 -54 3861 NULL -54 6517 40 -54 9159 75 -54 14737 38 -54 16059 15 -54 16974 NULL -54 17479 34 -55 1339 16 -55 3001 7 -55 5137 33 -55 9703 44 -55 12170 92 -55 12205 90 -55 14135 36 -55 14923 71 -55 17677 17 -56 4242 2 -56 4506 57 -56 8353 35 -56 8691 59 -56 8707 68 -56 10362 54 -56 16620 23 -56 17331 74 -57 3253 71 -57 4028 88 -57 4933 22 -57 12596 91 -57 12721 62 -57 12740 52 -57 15182 86 -57 17729 26 -57 17993 99 -58 1829 52 -58 3848 6 -58 5117 2 -58 7649 19 -58 9743 62 -58 10802 14 -58 15635 6 -58 16472 6 -58 16949 35 -59 3133 92 -59 3546 22 -59 5772 70 -59 7087 80 -59 8010 46 -59 8335 36 -59 9348 62 -59 9397 92 -59 10651 100 -59 11916 19 -59 12858 90 -59 14529 44 -60 97 50 -60 555 62 -60 633 71 -60 999 43 -60 1117 78 -60 1573 90 -60 4041 25 -60 4235 28 -60 4513 72 -60 4937 22 -60 7231 95 -60 10277 62 -60 10393 75 -60 13975 14 -60 16887 25 -60 17755 88 -61 1106 4 -61 2264 36 -61 3362 48 -61 4567 26 -61 5528 78 -61 6380 77 -61 7591 78 -61 8924 11 -61 10330 8 -61 16462 26 -62 4093 94 -62 6403 NULL -62 8457 37 -62 10149 75 -62 12163 29 -62 12199 5 -62 12407 NULL -62 13559 80 -62 15399 74 -62 15733 40 -62 16151 93 -63 4488 73 -63 5079 79 -63 5217 66 -63 5658 99 -63 9319 80 -63 11370 38 -63 11946 85 -63 13339 19 -63 15793 40 -63 16569 69 -64 1213 NULL -64 3090 87 -64 3963 NULL -64 11835 82 -64 13224 NULL -64 14407 8 -64 15867 59 -64 15936 30 -64 16921 19 -64 17586 78 -64 17617 17 -65 2287 100 -65 4227 42 -65 9625 51 -65 9847 54 -65 13897 40 -65 14905 85 -65 15177 55 -65 17025 67 -66 6507 76 -66 7033 65 -66 7227 66 -66 8197 41 -66 9237 29 -66 10019 10 -66 11419 66 -66 15629 20 -66 16745 91 -66 16795 28 -67 757 77 -67 2133 74 -67 3439 73 -67 4155 87 -67 5113 NULL -67 7020 79 -67 7507 77 -67 8469 59 -67 8871 71 -67 12087 70 -67 15699 44 -68 1387 74 -68 1603 57 -68 1820 54 -68 2035 22 -68 2296 52 -68 2564 83 -68 5162 23 -68 6763 77 -68 7765 NULL -68 12526 3 -68 12724 88 -68 17426 2 -68 17600 13 -69 322 45 -69 337 34 -69 4208 9 -69 4267 10 -69 6136 7 -69 7264 67 -69 7822 30 -69 8599 53 -69 11137 68 -69 13489 66 -69 13792 NULL -69 15448 16 -70 1592 53 -70 2462 NULL -70 3296 48 -70 3947 NULL -70 6185 82 -70 6425 NULL -70 8893 17 -70 9857 20 -70 14549 4 -70 17815 95 -71 457 75 -71 1888 4 -71 2098 51 -71 4144 49 -71 5858 NULL -71 6008 54 -71 7504 3 -71 8887 10 -71 9274 36 -71 9769 79 -71 9790 96 -71 9997 26 -71 10108 66 -71 10288 30 -71 11168 79 -71 17246 90 -72 1535 9 -72 5917 85 -72 6113 45 -72 6671 13 -72 9860 26 -72 10427 66 -72 10753 16 -72 11741 62 -72 12788 29 -72 12901 57 -72 13085 94 -72 13423 62 -72 13904 37 -72 15587 87 -72 16765 56 -73 247 53 -73 1063 37 -73 3205 82 -73 4946 54 -73 6862 58 -73 10051 49 -73 12502 75 -73 15109 38 -73 16519 97 -73 16585 38 -73 17269 40 -74 326 29 -74 3104 78 -74 3175 23 -74 3278 NULL -74 3542 96 -74 3754 26 -74 5492 54 -74 7694 17 -74 8653 12 -74 9620 95 -74 10069 99 -74 13208 87 -74 16694 72 -75 607 20 -75 2948 25 -75 4625 73 -75 6938 89 -75 6953 71 -75 8726 6 -75 9905 54 -75 10217 85 -75 11039 70 -75 14186 63 -75 16796 93 -76 257 5 -76 465 2 -76 1107 16 -76 1503 97 -76 2265 98 -76 2869 32 -76 3363 25 -76 4237 48 -76 4567 40 -76 5529 78 -76 6381 50 -76 7591 27 -76 8925 6 -76 10331 3 -76 16463 53 -77 992 62 -77 1399 34 -77 2713 85 -77 3868 89 -77 6289 30 -77 7339 88 -77 7448 95 -77 7486 49 -77 8686 38 -77 9220 90 -77 11918 36 -77 12439 95 -77 13456 48 -77 14815 18 -77 16687 16 -78 901 3 -78 3304 50 -78 3856 27 -78 5965 78 -78 6044 59 -78 6110 43 -78 6500 76 -78 7576 87 -78 8611 79 -78 10507 6 -78 11209 7 -78 12706 19 -78 14996 39 -79 247 NULL -79 1063 85 -79 3205 48 -79 4947 35 -79 6864 1 -79 10051 10 -79 10524 36 -79 12504 81 -79 14322 41 -79 15109 NULL -79 15498 3 -79 15888 58 -79 16519 9 -79 16585 93 -79 17269 81 -80 998 93 -80 1519 25 -80 1573 40 -80 4040 66 -80 4513 NULL -80 4622 1 -80 7231 49 -80 7610 37 -80 10393 5 -80 12968 NULL -80 13717 91 -80 13975 13 -80 16363 84 -80 16886 77 -80 17308 29 -80 17755 94 -81 4486 31 -81 5078 75 -81 5216 64 -81 5656 24 -81 7166 7 -81 7663 79 -81 8918 37 -81 9319 36 -81 11107 36 -81 11368 26 -81 13339 6 -81 15793 8 -82 2572 53 -82 7862 75 -82 13138 59 -82 14998 49 -82 17041 18 +1 49 5 5.0 10.68 10.68 10.680000000000000000 10.680000000000000000 +1 173 65 65.0 27.16 27.16 27.160000000000000000 27.160000000000000000 +1 1553 50 50.0 67.71 67.71 67.710000000000000000 67.710000000000000000 +1 3248 58 58.0 4.57 4.57 4.570000000000000000 4.570000000000000000 +1 3617 79 79.0 11.41 11.41 11.410000000000000000 11.410000000000000000 +1 4553 100 100.0 25.08 25.08 25.080000000000000000 25.080000000000000000 +1 4583 72 72.0 84.72 84.72 84.720000000000000000 84.720000000000000000 +1 4682 44 44.0 31.07 31.07 31.070000000000000000 31.070000000000000000 +1 5527 88 88.0 52.41 52.41 52.410000000000000000 52.410000000000000000 +1 5981 14 14.0 57.37 57.37 57.370000000000000000 57.370000000000000000 +1 10993 91 91.0 93.48 93.48 93.480000000000000000 93.480000000000000000 +1 13283 37 37.0 63.63 63.63 63.630000000000000000 63.630000000000000000 +1 13538 14 14.0 11.54 11.54 11.540000000000000000 11.540000000000000000 +1 13631 99 99.0 80.52 80.52 80.520000000000000000 80.520000000000000000 +2 1363 4 4.0 13.46 13.46 13.460000000000000000 13.460000000000000000 +2 2930 36 36.0 61.23 61.23 61.230000000000000000 61.230000000000000000 +2 3740 49 49.0 6.55 6.55 6.550000000000000000 6.550000000000000000 +2 6928 65 65.0 93.86 93.86 93.860000000000000000 93.860000000000000000 +2 7654 25 25.0 74.26 74.26 74.260000000000000000 74.260000000000000000 +2 9436 79 79.0 88.02 88.02 88.020000000000000000 88.020000000000000000 +2 10768 30 30.0 2.27 2.27 2.270000000000000000 2.270000000000000000 +2 12068 74 74.0 16.55 16.55 16.550000000000000000 16.550000000000000000 +2 12223 78 78.0 65.71 65.71 65.710000000000000000 65.710000000000000000 +2 13340 71 71.0 36.01 36.01 36.010000000000000000 36.010000000000000000 +2 13927 93 93.0 35.87 35.87 35.870000000000000000 35.870000000000000000 +2 14701 58 58.0 53.09 53.09 53.090000000000000000 53.090000000000000000 +2 15085 88 88.0 64.43 64.43 64.430000000000000000 64.430000000000000000 +2 15782 62 62.0 77.97 77.97 77.970000000000000000 77.970000000000000000 +2 17420 NULL NULL 17.12 17.12 17.120000000000000000 17.120000000000000000 +3 246 96 96.0 98.02 98.02 98.020000000000000000 98.020000000000000000 +3 1531 NULL NULL NULL NULL NULL NULL +3 3525 42 42.0 97.03 97.03 97.030000000000000000 97.030000000000000000 +3 4698 98 98.0 85.0 85.0 85.000000000000000000 85.000000000000000000 +3 5355 53 53.0 23.04 23.04 23.040000000000000000 23.040000000000000000 +3 10693 27 27.0 37.04 37.04 37.040000000000000000 37.040000000000000000 +3 12447 82 82.0 56.14 56.14 56.140000000000000000 56.140000000000000000 +3 13021 64 64.0 74.69 74.69 74.690000000000000000 74.690000000000000000 +3 14100 79 79.0 44.66 44.66 44.660000000000000000 44.660000000000000000 +3 14443 4 4.0 95.75 95.75 95.750000000000000000 95.750000000000000000 +3 15786 56 56.0 4.31 4.31 4.310000000000000000 4.310000000000000000 +3 16869 4 4.0 75.67 75.67 75.670000000000000000 75.670000000000000000 +3 17263 17 17.0 72.38 72.38 72.380000000000000000 72.380000000000000000 +3 17971 88 88.0 27.95 27.95 27.950000000000000000 27.950000000000000000 +4 163 17 17.0 54.26 54.26 54.260000000000000000 54.260000000000000000 +4 1576 74 74.0 81.81 81.81 81.810000000000000000 81.810000000000000000 +4 5350 86 86.0 64.67 64.67 64.670000000000000000 64.670000000000000000 +4 5515 23 23.0 2.91 2.91 2.910000000000000000 2.910000000000000000 +4 6988 23 23.0 53.28 53.28 53.280000000000000000 53.280000000000000000 +4 7990 56 56.0 64.68 64.68 64.680000000000000000 64.680000000000000000 +4 8452 27 27.0 26.21 26.21 26.210000000000000000 26.210000000000000000 +4 9685 21 21.0 40.39 40.39 40.390000000000000000 40.390000000000000000 +4 11036 41 41.0 67.18 67.18 67.180000000000000000 67.180000000000000000 +4 12790 43 43.0 54.34 54.34 54.340000000000000000 54.340000000000000000 +5 1808 NULL NULL NULL NULL NULL NULL +5 1940 60 60.0 69.54 69.54 69.540000000000000000 69.540000000000000000 +5 5842 50 50.0 30.69 30.69 30.690000000000000000 30.690000000000000000 +5 6068 76 76.0 89.78 89.78 89.780000000000000000 89.780000000000000000 +5 6466 36 36.0 7.93 7.93 7.930000000000000000 7.930000000000000000 +5 11324 52 52.0 16.33 16.33 16.330000000000000000 16.330000000000000000 +5 11590 15 15.0 21.21 21.21 21.210000000000000000 21.210000000000000000 +5 12650 66 66.0 21.01 21.01 21.010000000000000000 21.010000000000000000 +5 13562 64 64.0 87.9 87.9 87.900000000000000000 87.900000000000000000 +5 13958 60 60.0 41.72 41.72 41.720000000000000000 41.720000000000000000 +5 14599 83 83.0 74.15 74.15 74.150000000000000000 74.150000000000000000 +5 14686 91 91.0 27.68 27.68 27.680000000000000000 27.680000000000000000 +5 15752 66 66.0 71.06 71.06 71.060000000000000000 71.060000000000000000 +5 16195 50 50.0 30.96 30.96 30.960000000000000000 30.960000000000000000 +5 16792 71 71.0 22.1 22.1 22.100000000000000000 22.100000000000000000 +6 2549 62 62.0 85.07 85.07 85.070000000000000000 85.070000000000000000 +6 2647 100 100.0 4.45 4.45 4.450000000000000000 4.450000000000000000 +6 3049 31 31.0 49.78 49.78 49.780000000000000000 49.780000000000000000 +6 3291 100 100.0 41.08 41.08 41.080000000000000000 41.080000000000000000 +6 6437 72 72.0 55.49 55.49 55.490000000000000000 55.490000000000000000 +6 8621 NULL NULL NULL NULL NULL NULL +6 10355 94 94.0 62.67 62.67 62.670000000000000000 62.670000000000000000 +6 10895 1 1.0 71.1 71.1 71.100000000000000000 71.100000000000000000 +6 11705 61 61.0 48.18 48.18 48.180000000000000000 48.180000000000000000 +6 13245 64 64.0 86.35 86.35 86.350000000000000000 86.350000000000000000 +6 13513 42 42.0 64.46 64.46 64.460000000000000000 64.460000000000000000 +7 4627 9 9.0 56.13 56.13 56.130000000000000000 56.130000000000000000 +7 4795 73 73.0 12.17 12.17 12.170000000000000000 12.170000000000000000 +7 4833 88 88.0 38.23 38.23 38.230000000000000000 38.230000000000000000 +7 5183 51 51.0 84.65 84.65 84.650000000000000000 84.650000000000000000 +7 5905 69 69.0 99.85 99.85 99.850000000000000000 99.850000000000000000 +7 8955 54 54.0 42.82 42.82 42.820000000000000000 42.820000000000000000 +7 9751 4 4.0 NULL NULL NULL NULL +7 10487 52 52.0 63.8 63.8 63.800000000000000000 63.800000000000000000 +7 12571 82 82.0 69.53 69.53 69.530000000000000000 69.530000000000000000 +7 15179 12 12.0 47.6 47.6 47.600000000000000000 47.600000000000000000 +7 15333 NULL NULL NULL NULL NULL NULL +7 17255 69 69.0 34.19 34.19 34.190000000000000000 34.190000000000000000 +8 665 31 31.0 15.64 15.64 15.640000000000000000 15.640000000000000000 +8 4183 90 90.0 81.63 81.63 81.630000000000000000 81.630000000000000000 +8 5929 83 83.0 14.11 14.11 14.110000000000000000 14.110000000000000000 +8 7115 54 54.0 36.99 36.99 36.990000000000000000 36.990000000000000000 +8 11365 7 7.0 18.65 18.65 18.650000000000000000 18.650000000000000000 +8 11893 95 95.0 21.29 21.29 21.290000000000000000 21.290000000000000000 +8 12041 95 95.0 91.8 91.8 91.800000000000000000 91.800000000000000000 +8 13427 87 87.0 31.78 31.78 31.780000000000000000 31.780000000000000000 +8 16671 20 20.0 18.95 18.95 18.950000000000000000 18.950000000000000000 +8 17119 51 51.0 8.04 8.04 8.040000000000000000 8.040000000000000000 +8 17545 49 49.0 72.15 72.15 72.150000000000000000 72.150000000000000000 +9 69 11 11.0 31.7 31.7 31.700000000000000000 31.700000000000000000 +9 889 6 6.0 27.17 27.17 27.170000000000000000 27.170000000000000000 +9 1185 62 62.0 55.68 55.68 55.680000000000000000 55.680000000000000000 +9 4623 34 34.0 2.97 2.97 2.970000000000000000 2.970000000000000000 +9 7945 83 83.0 8.1 8.1 8.100000000000000000 8.100000000000000000 +9 8334 71 71.0 34.79 34.79 34.790000000000000000 34.790000000000000000 +9 12027 27 27.0 98.68 98.68 98.680000000000000000 98.680000000000000000 +9 12969 59 59.0 88.31 88.31 88.310000000000000000 88.310000000000000000 +9 13483 NULL NULL 59.14 59.14 59.140000000000000000 59.140000000000000000 +9 13717 53 53.0 75.37 75.37 75.370000000000000000 75.370000000000000000 +9 15133 15 15.0 35.89 35.89 35.890000000000000000 35.890000000000000000 +9 16083 32 32.0 99.1 99.1 99.100000000000000000 99.100000000000000000 +9 16363 54 54.0 NULL NULL NULL NULL +9 16461 66 66.0 15.21 15.21 15.210000000000000000 15.210000000000000000 +9 16659 84 84.0 76.71 76.71 76.710000000000000000 76.710000000000000000 +9 17310 33 33.0 27.13 27.13 27.130000000000000000 27.130000000000000000 +10 755 74 74.0 82.24 82.24 82.240000000000000000 82.240000000000000000 +10 1425 92 92.0 NULL NULL NULL NULL +10 1511 76 76.0 31.47 31.47 31.470000000000000000 31.470000000000000000 +10 3433 83 83.0 10.26 10.26 10.260000000000000000 10.260000000000000000 +10 3933 52 52.0 52.19 52.19 52.190000000000000000 52.190000000000000000 +10 4357 17 17.0 88.36 88.36 88.360000000000000000 88.360000000000000000 +10 5863 47 47.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +10 9811 28 28.0 47.85 47.85 47.850000000000000000 47.850000000000000000 +10 13803 66 66.0 82.35 82.35 82.350000000000000000 82.350000000000000000 +10 15447 67 67.0 33.28 33.28 33.280000000000000000 33.280000000000000000 +11 157 84 84.0 64.63 64.63 64.630000000000000000 64.630000000000000000 +11 1315 70 70.0 45.84 45.84 45.840000000000000000 45.840000000000000000 +11 7519 68 68.0 7.16 7.16 7.160000000000000000 7.160000000000000000 +11 7608 66 66.0 8.34 8.34 8.340000000000000000 8.340000000000000000 +11 9901 57 57.0 46.93 46.93 46.930000000000000000 46.930000000000000000 +11 10699 33 33.0 73.77 73.77 73.770000000000000000 73.770000000000000000 +11 11490 NULL NULL NULL NULL NULL NULL +11 11991 38 38.0 3.27 3.27 3.270000000000000000 3.270000000000000000 +11 12438 16 16.0 92.94 92.94 92.940000000000000000 92.940000000000000000 +11 15157 96 96.0 15.52 15.52 15.520000000000000000 15.520000000000000000 +11 15649 33 33.0 66.11 66.11 66.110000000000000000 66.110000000000000000 +11 17226 11 11.0 34.03 34.03 34.030000000000000000 34.030000000000000000 +11 17395 85 85.0 38.04 38.04 38.040000000000000000 38.040000000000000000 +12 373 57 57.0 13.95 13.95 13.950000000000000000 13.950000000000000000 +12 1591 82 82.0 45.84 45.84 45.840000000000000000 45.840000000000000000 +12 4888 56 56.0 75.74 75.74 75.740000000000000000 75.740000000000000000 +12 6148 36 36.0 97.62 97.62 97.620000000000000000 97.620000000000000000 +12 6248 36 36.0 75.17 75.17 75.170000000000000000 75.170000000000000000 +12 9616 66 66.0 99.06 99.06 99.060000000000000000 99.060000000000000000 +12 9788 73 73.0 79.42 79.42 79.420000000000000000 79.420000000000000000 +12 13399 46 46.0 45.27 45.27 45.270000000000000000 45.270000000000000000 +12 14746 26 26.0 58.74 58.74 58.740000000000000000 58.740000000000000000 +12 14944 9 9.0 7.33 7.33 7.330000000000000000 7.330000000000000000 +12 15440 99 99.0 27.09 27.09 27.090000000000000000 27.090000000000000000 +13 868 NULL NULL 62.85 62.85 62.850000000000000000 62.850000000000000000 +13 1760 12 12.0 80.96 80.96 80.960000000000000000 80.960000000000000000 +13 1898 NULL NULL 96.46 96.46 96.460000000000000000 96.460000000000000000 +13 2108 9 9.0 NULL NULL NULL NULL +13 2191 NULL NULL NULL NULL NULL NULL +13 4430 73 73.0 5.86 5.86 5.860000000000000000 5.860000000000000000 +13 5971 80 80.0 72.61 72.61 72.610000000000000000 72.610000000000000000 +13 6085 58 58.0 21.45 21.45 21.450000000000000000 21.450000000000000000 +13 6140 15 15.0 89.9 89.9 89.900000000000000000 89.900000000000000000 +13 6682 80 80.0 32.05 32.05 32.050000000000000000 32.050000000000000000 +13 7640 48 48.0 17.06 17.06 17.060000000000000000 17.060000000000000000 +13 7723 27 27.0 59.09 59.09 59.090000000000000000 59.090000000000000000 +13 10096 12 12.0 17.14 17.14 17.140000000000000000 17.140000000000000000 +13 11758 34 34.0 72.24 72.24 72.240000000000000000 72.240000000000000000 +13 16894 87 87.0 20.99 20.99 20.990000000000000000 20.990000000000000000 +13 17240 20 20.0 93.85 93.85 93.850000000000000000 93.850000000000000000 +14 177 41 41.0 13.05 13.05 13.050000000000000000 13.050000000000000000 +14 769 20 20.0 26.29 26.29 26.290000000000000000 26.290000000000000000 +14 4507 4 4.0 45.45 45.45 45.450000000000000000 45.450000000000000000 +14 10175 19 19.0 39.97 39.97 39.970000000000000000 39.970000000000000000 +14 11549 6 6.0 19.33 19.33 19.330000000000000000 19.330000000000000000 +14 11653 60 60.0 86.94 86.94 86.940000000000000000 86.940000000000000000 +14 11817 81 81.0 60.77 60.77 60.770000000000000000 60.770000000000000000 +14 12587 NULL NULL NULL NULL NULL NULL +14 13069 77 77.0 93.6 93.6 93.600000000000000000 93.600000000000000000 +14 13515 57 57.0 87.32 87.32 87.320000000000000000 87.320000000000000000 +14 13845 17 17.0 52.3 52.3 52.300000000000000000 52.300000000000000000 +14 16741 46 46.0 76.43 76.43 76.430000000000000000 76.430000000000000000 +14 16929 14 14.0 54.76 54.76 54.760000000000000000 54.760000000000000000 +15 4241 21 21.0 89.07 89.07 89.070000000000000000 89.070000000000000000 +15 4505 59 59.0 77.35 77.35 77.350000000000000000 77.350000000000000000 +15 4777 28 28.0 36.86 36.86 36.860000000000000000 36.860000000000000000 +15 7391 98 98.0 53.76 53.76 53.760000000000000000 53.760000000000000000 +15 8336 15 15.0 44.09 44.09 44.090000000000000000 44.090000000000000000 +15 8353 NULL NULL NULL NULL NULL NULL +15 8690 32 32.0 67.37 67.37 67.370000000000000000 67.370000000000000000 +15 8707 21 21.0 48.54 48.54 48.540000000000000000 48.540000000000000000 +15 10361 39 39.0 74.88 74.88 74.880000000000000000 74.880000000000000000 +15 11659 80 80.0 86.23 86.23 86.230000000000000000 86.230000000000000000 +15 13172 25 25.0 47.11 47.11 47.110000000000000000 47.110000000000000000 +15 16619 81 81.0 80.21 80.21 80.210000000000000000 80.210000000000000000 +15 17267 7 7.0 30.61 30.61 30.610000000000000000 30.610000000000000000 +15 17330 82 82.0 67.45 67.45 67.450000000000000000 67.450000000000000000 +15 17564 26 26.0 63.52 63.52 63.520000000000000000 63.520000000000000000 +15 17857 38 38.0 96.35 96.35 96.350000000000000000 96.350000000000000000 +16 457 60 60.0 91.53 91.53 91.530000000000000000 91.530000000000000000 +16 1888 4 4.0 47.64 47.64 47.640000000000000000 47.640000000000000000 +16 4144 94 94.0 19.91 19.91 19.910000000000000000 19.910000000000000000 +16 6008 59 59.0 59.62 59.62 59.620000000000000000 59.620000000000000000 +16 7504 51 51.0 31.35 31.35 31.350000000000000000 31.350000000000000000 +16 8887 35 35.0 59.82 59.82 59.820000000000000000 59.820000000000000000 +16 9769 42 42.0 29.53 29.53 29.530000000000000000 29.530000000000000000 +16 9790 17 17.0 36.95 36.95 36.950000000000000000 36.950000000000000000 +16 9997 94 94.0 64.76 64.76 64.760000000000000000 64.760000000000000000 +16 11168 86 86.0 62.85 62.85 62.850000000000000000 62.850000000000000000 +16 11920 29 29.0 94.31 94.31 94.310000000000000000 94.310000000000000000 +16 16226 13 13.0 31.3 31.3 31.300000000000000000 31.300000000000000000 +16 17246 70 70.0 80.85 80.85 80.850000000000000000 80.850000000000000000 +17 2092 37 37.0 31.71 31.71 31.710000000000000000 31.710000000000000000 +17 4678 34 34.0 32.47 32.47 32.470000000000000000 32.470000000000000000 +17 6811 70 70.0 62.96 62.96 62.960000000000000000 62.960000000000000000 +17 9214 57 57.0 14.2 14.2 14.200000000000000000 14.200000000000000000 +17 10543 54 54.0 57.11 57.11 57.110000000000000000 57.110000000000000000 +17 11203 21 21.0 93.44 93.44 93.440000000000000000 93.440000000000000000 +17 13177 45 45.0 44.18 44.18 44.180000000000000000 44.180000000000000000 +17 13826 32 32.0 58.61 58.61 58.610000000000000000 58.610000000000000000 +17 15781 76 76.0 24.79 24.79 24.790000000000000000 24.790000000000000000 +17 17683 34 34.0 81.48 81.48 81.480000000000000000 81.480000000000000000 +18 2440 40 40.0 15.39 15.39 15.390000000000000000 15.390000000000000000 +18 5251 41 41.0 45.83 45.83 45.830000000000000000 45.830000000000000000 +18 7378 94 94.0 61.01 61.01 61.010000000000000000 61.010000000000000000 +18 8779 9 9.0 75.19 75.19 75.190000000000000000 75.190000000000000000 +18 8884 18 18.0 43.49 43.49 43.490000000000000000 43.490000000000000000 +18 9886 62 62.0 9.59 9.59 9.590000000000000000 9.590000000000000000 +18 11584 76 76.0 4.26 4.26 4.260000000000000000 4.260000000000000000 +18 11890 7 7.0 82.36 82.36 82.360000000000000000 82.360000000000000000 +18 12602 81 81.0 11.32 11.32 11.320000000000000000 11.320000000000000000 +18 12826 93 93.0 82.82 82.82 82.820000000000000000 82.820000000000000000 +18 12860 18 18.0 19.89 19.89 19.890000000000000000 19.890000000000000000 +18 14011 95 95.0 55.01 55.01 55.010000000000000000 55.010000000000000000 +18 14372 76 76.0 89.58 89.58 89.580000000000000000 89.580000000000000000 +18 14377 15 15.0 15.47 15.47 15.470000000000000000 15.470000000000000000 +18 17995 13 13.0 46.79 46.79 46.790000000000000000 46.790000000000000000 +19 1094 48 48.0 19.55 19.55 19.550000000000000000 19.550000000000000000 +19 3133 96 96.0 68.89 68.89 68.890000000000000000 68.890000000000000000 +19 3376 84 84.0 63.07 63.07 63.070000000000000000 63.070000000000000000 +19 4882 84 84.0 41.48 41.48 41.480000000000000000 41.480000000000000000 +19 6772 97 97.0 36.04 36.04 36.040000000000000000 36.040000000000000000 +19 7087 1 1.0 48.67 48.67 48.670000000000000000 48.670000000000000000 +19 7814 29 29.0 61.78 61.78 61.780000000000000000 61.780000000000000000 +19 8662 97 97.0 72.78 72.78 72.780000000000000000 72.780000000000000000 +19 9094 49 49.0 61.82 61.82 61.820000000000000000 61.820000000000000000 +19 9346 39 39.0 84.06 84.06 84.060000000000000000 84.060000000000000000 +19 10558 82 82.0 12.34 12.34 12.340000000000000000 12.340000000000000000 +19 10651 46 46.0 57.69 57.69 57.690000000000000000 57.690000000000000000 +19 11914 59 59.0 88.03 88.03 88.030000000000000000 88.030000000000000000 +19 16330 NULL NULL 79.15 79.15 79.150000000000000000 79.150000000000000000 +19 17539 20 20.0 69.2 69.2 69.200000000000000000 69.200000000000000000 +20 1451 89 89.0 84.34 84.34 84.340000000000000000 84.340000000000000000 +20 2618 4 4.0 69.47 69.47 69.470000000000000000 69.470000000000000000 +20 5312 9 9.0 29.45 29.45 29.450000000000000000 29.450000000000000000 +20 5425 15 15.0 28.19 28.19 28.190000000000000000 28.190000000000000000 +20 5483 8 8.0 30.74 30.74 30.740000000000000000 30.740000000000000000 +20 6026 21 21.0 80.56 80.56 80.560000000000000000 80.560000000000000000 +20 7207 90 90.0 83.12 83.12 83.120000000000000000 83.120000000000000000 +20 8714 NULL NULL 8.15 8.15 8.150000000000000000 8.150000000000000000 +20 9086 4 4.0 98.99 98.99 98.990000000000000000 98.990000000000000000 +20 9800 32 32.0 18.09 18.09 18.090000000000000000 18.090000000000000000 +20 13601 17 17.0 1.4 1.4 1.400000000000000000 1.400000000000000000 +20 14935 NULL NULL NULL NULL NULL NULL +20 15131 85 85.0 42.56 42.56 42.560000000000000000 42.560000000000000000 +21 230 48 48.0 13.37 13.37 13.370000000000000000 13.370000000000000000 +21 1810 59 59.0 66.37 66.37 66.370000000000000000 66.370000000000000000 +21 2870 50 50.0 91.94 91.94 91.940000000000000000 91.940000000000000000 +21 5170 45 45.0 90.0 90.0 90.000000000000000000 90.000000000000000000 +21 5998 51 51.0 9.41 9.41 9.410000000000000000 9.410000000000000000 +21 6476 49 49.0 20.29 20.29 20.290000000000000000 20.290000000000000000 +21 9187 14 14.0 35.49 35.49 35.490000000000000000 35.490000000000000000 +21 12266 47 47.0 11.55 11.55 11.550000000000000000 11.550000000000000000 +21 14368 18 18.0 51.29 51.29 51.290000000000000000 51.290000000000000000 +21 14396 88 88.0 45.26 45.26 45.260000000000000000 45.260000000000000000 +22 9985 70 70.0 21.46 21.46 21.460000000000000000 21.460000000000000000 +22 10474 31 31.0 45.65 45.65 45.650000000000000000 45.650000000000000000 +22 11599 66 66.0 5.01 5.01 5.010000000000000000 5.010000000000000000 +22 12415 10 10.0 38.97 38.97 38.970000000000000000 38.970000000000000000 +22 15310 15 15.0 82.24 82.24 82.240000000000000000 82.240000000000000000 +22 16396 85 85.0 86.46 86.46 86.460000000000000000 86.460000000000000000 +22 16922 88 88.0 28.0 28.0 28.000000000000000000 28.000000000000000000 +22 17392 14 14.0 51.86 51.86 51.860000000000000000 51.860000000000000000 +22 17660 70 70.0 95.56 95.56 95.560000000000000000 95.560000000000000000 +23 319 86 86.0 66.36 66.36 66.360000000000000000 66.360000000000000000 +23 7242 37 37.0 54.82 54.82 54.820000000000000000 54.820000000000000000 +23 8181 13 13.0 4.63 4.63 4.630000000000000000 4.630000000000000000 +23 8413 1 1.0 14.2 14.2 14.200000000000000000 14.200000000000000000 +23 9093 38 38.0 80.2 80.2 80.200000000000000000 80.200000000000000000 +23 9097 81 81.0 72.51 72.51 72.510000000000000000 72.510000000000000000 +23 11220 91 91.0 71.3 71.3 71.300000000000000000 71.300000000000000000 +23 11257 64 64.0 29.95 29.95 29.950000000000000000 29.950000000000000000 +23 12397 80 80.0 78.73 78.73 78.730000000000000000 78.730000000000000000 +23 15403 96 96.0 51.96 51.96 51.960000000000000000 51.960000000000000000 +23 17631 16 16.0 22.06 22.06 22.060000000000000000 22.060000000000000000 +24 407 53 53.0 98.05 98.05 98.050000000000000000 98.050000000000000000 +24 1389 72 72.0 60.01 60.01 60.010000000000000000 60.010000000000000000 +24 1795 21 21.0 76.67 76.67 76.670000000000000000 76.670000000000000000 +24 2497 85 85.0 57.93 57.93 57.930000000000000000 57.930000000000000000 +24 3103 73 73.0 44.96 44.96 44.960000000000000000 44.960000000000000000 +24 4425 57 57.0 29.31 29.31 29.310000000000000000 29.310000000000000000 +24 4749 28 28.0 18.17 18.17 18.170000000000000000 18.170000000000000000 +24 4873 41 41.0 40.34 40.34 40.340000000000000000 40.340000000000000000 +24 5653 92 92.0 64.99 64.99 64.990000000000000000 64.990000000000000000 +24 6043 1 1.0 33.41 33.41 33.410000000000000000 33.410000000000000000 +24 6751 82 82.0 7.48 7.48 7.480000000000000000 7.480000000000000000 +24 7375 97 97.0 78.55 78.55 78.550000000000000000 78.550000000000000000 +24 10265 93 93.0 12.03 12.03 12.030000000000000000 12.030000000000000000 +24 11551 48 48.0 30.8 30.8 30.800000000000000000 30.800000000000000000 +24 13303 97 97.0 94.48 94.48 94.480000000000000000 94.480000000000000000 +24 16483 89 89.0 13.84 13.84 13.840000000000000000 13.840000000000000000 +25 1333 55 55.0 30.82 30.82 30.820000000000000000 30.820000000000000000 +25 2150 100 100.0 67.24 67.24 67.240000000000000000 67.240000000000000000 +25 2608 76 76.0 87.75 87.75 87.750000000000000000 87.750000000000000000 +25 3454 100 100.0 1.61 1.61 1.610000000000000000 1.610000000000000000 +25 4880 29 29.0 15.35 15.35 15.350000000000000000 15.350000000000000000 +25 5954 34 34.0 76.57 76.57 76.570000000000000000 76.570000000000000000 +25 6955 40 40.0 87.12 87.12 87.120000000000000000 87.120000000000000000 +25 7874 65 65.0 2.75 2.75 2.750000000000000000 2.750000000000000000 +25 9472 48 48.0 4.97 4.97 4.970000000000000000 4.970000000000000000 +25 10159 24 24.0 76.64 76.64 76.640000000000000000 76.640000000000000000 +25 14488 26 26.0 68.17 68.17 68.170000000000000000 68.170000000000000000 +25 14635 68 68.0 45.79 45.79 45.790000000000000000 45.790000000000000000 +25 17000 40 40.0 89.34 89.34 89.340000000000000000 89.340000000000000000 +25 17752 55 55.0 11.49 11.49 11.490000000000000000 11.490000000000000000 +26 1989 26 26.0 83.31 83.31 83.310000000000000000 83.310000000000000000 +26 5053 4 4.0 19.63 19.63 19.630000000000000000 19.630000000000000000 +26 5385 97 97.0 51.89 51.89 51.890000000000000000 51.890000000000000000 +26 5721 81 81.0 74.96 74.96 74.960000000000000000 74.960000000000000000 +26 6647 64 64.0 57.04 57.04 57.040000000000000000 57.040000000000000000 +26 7337 45 45.0 37.59 37.59 37.590000000000000000 37.590000000000000000 +26 9679 18 18.0 77.54 77.54 77.540000000000000000 77.540000000000000000 +26 11895 77 77.0 36.85 36.85 36.850000000000000000 36.850000000000000000 +26 12851 56 56.0 14.02 14.02 14.020000000000000000 14.020000000000000000 +26 15039 34 34.0 22.65 22.65 22.650000000000000000 22.650000000000000000 +27 1305 44 44.0 8.35 8.35 8.350000000000000000 8.350000000000000000 +27 2137 96 96.0 3.07 3.07 3.070000000000000000 3.070000000000000000 +27 2671 92 92.0 4.35 4.35 4.350000000000000000 4.350000000000000000 +27 5831 61 61.0 8.79 8.79 8.790000000000000000 8.790000000000000000 +27 7139 59 59.0 6.17 6.17 6.170000000000000000 6.170000000000000000 +27 8167 28 28.0 38.83 38.83 38.830000000000000000 38.830000000000000000 +27 10757 15 15.0 8.7 8.7 8.700000000000000000 8.700000000000000000 +27 11441 15 15.0 14.45 14.45 14.450000000000000000 14.450000000000000000 +27 11509 65 65.0 80.34 80.34 80.340000000000000000 80.340000000000000000 +27 12237 89 89.0 73.9 73.9 73.900000000000000000 73.900000000000000000 +27 12749 31 31.0 80.27 80.27 80.270000000000000000 80.270000000000000000 +27 13885 66 66.0 40.62 40.62 40.620000000000000000 40.620000000000000000 +27 15025 26 26.0 35.56 35.56 35.560000000000000000 35.560000000000000000 +27 16029 59 59.0 2.11 2.11 2.110000000000000000 2.110000000000000000 +27 16419 65 65.0 80.1 80.1 80.100000000000000000 80.100000000000000000 +27 16767 60 60.0 68.33 68.33 68.330000000000000000 68.330000000000000000 +28 1807 98 98.0 78.91 78.91 78.910000000000000000 78.910000000000000000 +28 2817 8 8.0 98.75 98.75 98.750000000000000000 98.750000000000000000 +28 2967 29 29.0 47.87 47.87 47.870000000000000000 47.870000000000000000 +28 4483 78 78.0 73.9 73.9 73.900000000000000000 73.900000000000000000 +28 5437 15 15.0 7.49 7.49 7.490000000000000000 7.490000000000000000 +28 6411 3 3.0 67.26 67.26 67.260000000000000000 67.260000000000000000 +28 7965 93 93.0 77.74 77.74 77.740000000000000000 77.740000000000000000 +28 8043 58 58.0 60.26 60.26 60.260000000000000000 60.260000000000000000 +28 8407 14 14.0 95.01 95.01 95.010000000000000000 95.010000000000000000 +28 10295 13 13.0 31.83 31.83 31.830000000000000000 31.830000000000000000 +29 20 18 18.0 66.26 66.26 66.260000000000000000 66.260000000000000000 +29 1363 75 75.0 NULL NULL NULL NULL +29 2930 23 23.0 64.78 64.78 64.780000000000000000 64.780000000000000000 +29 3740 5 5.0 90.13 90.13 90.130000000000000000 90.130000000000000000 +29 7654 20 20.0 98.14 98.14 98.140000000000000000 98.140000000000000000 +29 9458 33 33.0 52.33 52.33 52.330000000000000000 52.330000000000000000 +29 10795 33 33.0 68.24 68.24 68.240000000000000000 68.240000000000000000 +29 12068 37 37.0 80.75 80.75 80.750000000000000000 80.750000000000000000 +29 12223 59 59.0 12.89 12.89 12.890000000000000000 12.890000000000000000 +29 13340 21 21.0 40.5 40.5 40.500000000000000000 40.500000000000000000 +29 13693 NULL NULL 95.63 95.63 95.630000000000000000 95.630000000000000000 +29 15085 40 40.0 NULL NULL NULL NULL +29 15626 NULL NULL 17.61 17.61 17.610000000000000000 17.610000000000000000 +29 15782 53 53.0 57.11 57.11 57.110000000000000000 57.110000000000000000 +30 217 91 91.0 52.03 52.03 52.030000000000000000 52.030000000000000000 +30 1951 59 59.0 17.14 17.14 17.140000000000000000 17.140000000000000000 +30 3238 16 16.0 9.84 9.84 9.840000000000000000 9.840000000000000000 +30 3506 15 15.0 16.31 16.31 16.310000000000000000 16.310000000000000000 +30 3928 87 87.0 27.01 27.01 27.010000000000000000 27.010000000000000000 +30 5431 77 77.0 52.37 52.37 52.370000000000000000 52.370000000000000000 +30 6752 69 69.0 40.8 40.8 40.800000000000000000 40.800000000000000000 +30 7870 7 7.0 4.51 4.51 4.510000000000000000 4.510000000000000000 +30 8666 21 21.0 64.0 64.0 64.000000000000000000 64.000000000000000000 +30 12572 33 33.0 61.96 61.96 61.960000000000000000 61.960000000000000000 +30 12670 20 20.0 6.44 6.44 6.440000000000000000 6.440000000000000000 +30 13579 75 75.0 62.71 62.71 62.710000000000000000 62.710000000000000000 +30 14848 62 62.0 64.03 64.03 64.030000000000000000 64.030000000000000000 +30 17348 62 62.0 88.74 88.74 88.740000000000000000 88.740000000000000000 +30 17875 78 78.0 2.91 2.91 2.910000000000000000 2.910000000000000000 +31 913 54 54.0 79.11 79.11 79.110000000000000000 79.110000000000000000 +31 4963 67 67.0 56.37 56.37 56.370000000000000000 56.370000000000000000 +31 6617 11 11.0 86.78 86.78 86.780000000000000000 86.780000000000000000 +31 6917 4 4.0 49.76 49.76 49.760000000000000000 49.760000000000000000 +31 7513 82 82.0 44.95 44.95 44.950000000000000000 44.950000000000000000 +31 11739 95 95.0 6.99 6.99 6.990000000000000000 6.990000000000000000 +31 14575 97 97.0 59.9 59.9 59.900000000000000000 59.900000000000000000 +31 14727 41 41.0 48.1 48.1 48.100000000000000000 48.100000000000000000 +31 15341 31 31.0 16.15 16.15 16.150000000000000000 16.150000000000000000 +31 15411 53 53.0 47.64 47.64 47.640000000000000000 47.640000000000000000 +31 16251 51 51.0 91.49 91.49 91.490000000000000000 91.490000000000000000 +32 1115 61 61.0 97.03 97.03 97.030000000000000000 97.030000000000000000 +32 2095 34 34.0 89.33 89.33 89.330000000000000000 89.330000000000000000 +32 2887 8 8.0 48.71 48.71 48.710000000000000000 48.710000000000000000 +32 4339 6 6.0 88.27 88.27 88.270000000000000000 88.270000000000000000 +32 4537 22 22.0 65.72 65.72 65.720000000000000000 65.720000000000000000 +32 4808 NULL NULL 57.01 57.01 57.010000000000000000 57.010000000000000000 +32 5798 87 87.0 46.23 46.23 46.230000000000000000 46.230000000000000000 +32 7547 24 24.0 43.33 43.33 43.330000000000000000 43.330000000000000000 +32 9683 26 26.0 NULL NULL NULL NULL +32 11005 46 46.0 51.48 51.48 51.480000000000000000 51.480000000000000000 +32 11348 41 41.0 55.14 55.14 55.140000000000000000 55.140000000000000000 +32 12134 21 21.0 51.01 51.01 51.010000000000000000 51.010000000000000000 +32 15001 57 57.0 30.07 30.07 30.070000000000000000 30.070000000000000000 +32 15644 34 34.0 80.54 80.54 80.540000000000000000 80.540000000000000000 +32 16421 74 74.0 89.89 89.89 89.890000000000000000 89.890000000000000000 +32 17659 51 51.0 23.88 23.88 23.880000000000000000 23.880000000000000000 +33 4798 27 27.0 28.56 28.56 28.560000000000000000 28.560000000000000000 +33 7300 3 3.0 3.13 3.13 3.130000000000000000 3.130000000000000000 +33 9649 36 36.0 18.91 18.91 18.910000000000000000 18.910000000000000000 +33 10376 21 21.0 55.09 55.09 55.090000000000000000 55.090000000000000000 +33 11119 92 92.0 3.49 3.49 3.490000000000000000 3.490000000000000000 +33 11756 26 26.0 58.87 58.87 58.870000000000000000 58.870000000000000000 +33 12643 89 89.0 35.74 35.74 35.740000000000000000 35.740000000000000000 +33 12760 54 54.0 48.97 48.97 48.970000000000000000 48.970000000000000000 +33 12964 80 80.0 83.86 83.86 83.860000000000000000 83.860000000000000000 +33 14125 66 66.0 44.03 44.03 44.030000000000000000 44.030000000000000000 +33 14158 82 82.0 48.07 48.07 48.070000000000000000 48.070000000000000000 +33 14692 93 93.0 56.78 56.78 56.780000000000000000 56.780000000000000000 +33 15478 22 22.0 95.96 95.96 95.960000000000000000 95.960000000000000000 +34 1526 91 91.0 78.12 78.12 78.120000000000000000 78.120000000000000000 +34 1717 53 53.0 99.68 99.68 99.680000000000000000 99.680000000000000000 +34 2312 6 6.0 51.4 51.4 51.400000000000000000 51.400000000000000000 +34 4118 88 88.0 38.38 38.38 38.380000000000000000 38.380000000000000000 +34 5197 63 63.0 13.5 13.5 13.500000000000000000 13.500000000000000000 +34 5449 9 9.0 21.24 21.24 21.240000000000000000 21.240000000000000000 +34 6193 61 61.0 54.55 54.55 54.550000000000000000 54.550000000000000000 +34 9325 3 3.0 92.35 92.35 92.350000000000000000 92.350000000000000000 +34 9766 83 83.0 68.57 68.57 68.570000000000000000 68.570000000000000000 +34 12016 42 42.0 42.44 42.44 42.440000000000000000 42.440000000000000000 +34 12290 53 53.0 88.61 88.61 88.610000000000000000 88.610000000000000000 +34 12512 60 60.0 40.48 40.48 40.480000000000000000 40.480000000000000000 +34 13814 20 20.0 22.82 22.82 22.820000000000000000 22.820000000000000000 +34 16324 30 30.0 37.27 37.27 37.270000000000000000 37.270000000000000000 +35 411 51 51.0 NULL NULL NULL NULL +35 2377 52 52.0 98.03 98.03 98.030000000000000000 98.030000000000000000 +35 3667 97 97.0 59.31 59.31 59.310000000000000000 59.310000000000000000 +35 4325 56 56.0 67.43 67.43 67.430000000000000000 67.430000000000000000 +35 5179 83 83.0 90.54 90.54 90.540000000000000000 90.540000000000000000 +35 11635 87 87.0 92.02 92.02 92.020000000000000000 92.020000000000000000 +35 11661 81 81.0 NULL NULL NULL NULL +35 14239 55 55.0 8.27 8.27 8.270000000000000000 8.270000000000000000 +35 15619 45 45.0 90.28 90.28 90.280000000000000000 90.280000000000000000 +35 15757 9 9.0 14.83 14.83 14.830000000000000000 14.830000000000000000 +35 17341 92 92.0 59.48 59.48 59.480000000000000000 59.480000000000000000 +35 17365 65 65.0 76.2 76.2 76.200000000000000000 76.200000000000000000 +35 17451 7 7.0 45.66 45.66 45.660000000000000000 45.660000000000000000 +36 1115 80 80.0 11.13 11.13 11.130000000000000000 11.130000000000000000 +36 2095 43 43.0 91.17 91.17 91.170000000000000000 91.170000000000000000 +36 2887 31 31.0 24.53 24.53 24.530000000000000000 24.530000000000000000 +36 7547 46 46.0 8.04 8.04 8.040000000000000000 8.040000000000000000 +36 11005 49 49.0 70.6 70.6 70.600000000000000000 70.600000000000000000 +36 11349 80 80.0 58.17 58.17 58.170000000000000000 58.170000000000000000 +36 15001 54 54.0 16.24 16.24 16.240000000000000000 16.240000000000000000 +36 15645 23 23.0 32.35 32.35 32.350000000000000000 32.350000000000000000 +36 16421 25 25.0 69.67 69.67 69.670000000000000000 69.670000000000000000 +36 17561 16 16.0 82.46 82.46 82.460000000000000000 82.460000000000000000 +36 17659 91 91.0 44.83 44.83 44.830000000000000000 44.830000000000000000 +37 2997 94 94.0 85.67 85.67 85.670000000000000000 85.670000000000000000 +37 7283 87 87.0 54.25 54.25 54.250000000000000000 54.250000000000000000 +37 10715 52 52.0 89.22 89.22 89.220000000000000000 89.220000000000000000 +37 10929 88 88.0 65.45 65.45 65.450000000000000000 65.450000000000000000 +37 13171 6 6.0 84.14 84.14 84.140000000000000000 84.140000000000000000 +37 15337 62 62.0 16.64 16.64 16.640000000000000000 16.640000000000000000 +37 16971 12 12.0 53.97 53.97 53.970000000000000000 53.970000000000000000 +37 17125 NULL NULL NULL NULL NULL NULL +38 757 2 2.0 NULL NULL NULL NULL +38 2164 17 17.0 72.04 72.04 72.040000000000000000 72.040000000000000000 +38 3439 84 84.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +38 4154 35 35.0 10.28 10.28 10.280000000000000000 10.280000000000000000 +38 5113 73 73.0 50.59 50.59 50.590000000000000000 50.590000000000000000 +38 6220 98 98.0 14.54 14.54 14.540000000000000000 14.540000000000000000 +38 7018 15 15.0 69.78 69.78 69.780000000000000000 69.780000000000000000 +38 7784 56 56.0 31.89 31.89 31.890000000000000000 31.890000000000000000 +38 8870 15 15.0 46.69 46.69 46.690000000000000000 46.690000000000000000 +38 9710 7 7.0 82.77 82.77 82.770000000000000000 82.770000000000000000 +38 10441 62 62.0 80.37 80.37 80.370000000000000000 80.370000000000000000 +38 15698 57 57.0 11.4 11.4 11.400000000000000000 11.400000000000000000 +39 386 89 89.0 28.08 28.08 28.080000000000000000 28.080000000000000000 +39 1598 64 64.0 44.63 44.63 44.630000000000000000 44.630000000000000000 +39 3476 73 73.0 80.57 80.57 80.570000000000000000 80.570000000000000000 +39 3943 64 64.0 59.68 59.68 59.680000000000000000 59.680000000000000000 +39 4190 86 86.0 35.56 35.56 35.560000000000000000 35.560000000000000000 +39 4957 24 24.0 16.1 16.1 16.100000000000000000 16.100000000000000000 +39 5393 98 98.0 58.75 58.75 58.750000000000000000 58.750000000000000000 +39 7097 78 78.0 33.1 33.1 33.100000000000000000 33.100000000000000000 +39 7118 67 67.0 68.99 68.99 68.990000000000000000 68.990000000000000000 +39 7604 49 49.0 46.49 46.49 46.490000000000000000 46.490000000000000000 +39 7697 24 24.0 44.89 44.89 44.890000000000000000 44.890000000000000000 +39 8078 54 54.0 73.6 73.6 73.600000000000000000 73.600000000000000000 +39 8411 96 96.0 35.69 35.69 35.690000000000000000 35.690000000000000000 +39 15491 54 54.0 3.2 3.2 3.200000000000000000 3.200000000000000000 +39 15625 17 17.0 96.62 96.62 96.620000000000000000 96.620000000000000000 +40 2854 71 71.0 10.62 10.62 10.620000000000000000 10.620000000000000000 +40 3490 65 65.0 41.24 41.24 41.240000000000000000 41.240000000000000000 +40 3985 63 63.0 22.94 22.94 22.940000000000000000 22.940000000000000000 +40 5098 35 35.0 33.91 33.91 33.910000000000000000 33.910000000000000000 +40 5318 87 87.0 32.66 32.66 32.660000000000000000 32.660000000000000000 +40 10094 80 80.0 8.63 8.63 8.630000000000000000 8.630000000000000000 +40 10912 23 23.0 2.46 2.46 2.460000000000000000 2.460000000000000000 +40 12050 NULL NULL 38.12 38.12 38.120000000000000000 38.120000000000000000 +40 13658 53 53.0 56.42 56.42 56.420000000000000000 56.420000000000000000 +40 16976 3 3.0 20.7 20.7 20.700000000000000000 20.700000000000000000 +41 10 50 50.0 54.36 54.36 54.360000000000000000 54.360000000000000000 +41 64 29 29.0 27.18 27.18 27.180000000000000000 27.180000000000000000 +41 3380 88 88.0 14.11 14.11 14.110000000000000000 14.110000000000000000 +41 5566 11 11.0 50.45 50.45 50.450000000000000000 50.450000000000000000 +41 6310 90 90.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +41 7402 69 69.0 57.23 57.23 57.230000000000000000 57.230000000000000000 +41 7603 94 94.0 6.12 6.12 6.120000000000000000 6.120000000000000000 +41 9322 8 8.0 59.4 59.4 59.400000000000000000 59.400000000000000000 +41 10915 81 81.0 91.63 91.63 91.630000000000000000 91.630000000000000000 +41 14788 15 15.0 90.04 90.04 90.040000000000000000 90.040000000000000000 +41 15242 87 87.0 48.25 48.25 48.250000000000000000 48.250000000000000000 +41 15328 46 46.0 84.03 84.03 84.030000000000000000 84.030000000000000000 +41 16514 20 20.0 5.05 5.05 5.050000000000000000 5.050000000000000000 +42 619 69 69.0 56.85 56.85 56.850000000000000000 56.850000000000000000 +42 976 100 100.0 12.59 12.59 12.590000000000000000 12.590000000000000000 +42 1436 94 94.0 54.21 54.21 54.210000000000000000 54.210000000000000000 +42 2314 74 74.0 24.46 24.46 24.460000000000000000 24.460000000000000000 +42 2392 14 14.0 49.48 49.48 49.480000000000000000 49.480000000000000000 +42 2602 30 30.0 55.77 55.77 55.770000000000000000 55.770000000000000000 +42 3346 74 74.0 29.72 29.72 29.720000000000000000 29.720000000000000000 +42 3613 30 30.0 56.33 56.33 56.330000000000000000 56.330000000000000000 +42 6058 30 30.0 81.1 81.1 81.100000000000000000 81.100000000000000000 +42 6134 92 92.0 18.91 18.91 18.910000000000000000 18.910000000000000000 +42 8462 23 23.0 27.88 27.88 27.880000000000000000 27.880000000000000000 +42 9740 52 52.0 52.46 52.46 52.460000000000000000 52.460000000000000000 +42 10016 57 57.0 12.47 12.47 12.470000000000000000 12.470000000000000000 +42 10471 19 19.0 42.67 42.67 42.670000000000000000 42.670000000000000000 +42 12550 41 41.0 17.09 17.09 17.090000000000000000 17.090000000000000000 +42 15002 41 41.0 58.33 58.33 58.330000000000000000 58.330000000000000000 +43 2923 16 16.0 82.12 82.12 82.120000000000000000 82.120000000000000000 +43 3344 22 22.0 88.77 88.77 88.770000000000000000 88.770000000000000000 +43 3911 26 26.0 21.75 21.75 21.750000000000000000 21.750000000000000000 +43 4364 77 77.0 82.92 82.92 82.920000000000000000 82.920000000000000000 +43 4691 41 41.0 2.24 2.24 2.240000000000000000 2.240000000000000000 +43 5773 85 85.0 66.42 66.42 66.420000000000000000 66.420000000000000000 +43 5852 16 16.0 81.99 81.99 81.990000000000000000 81.990000000000000000 +43 11771 30 30.0 41.13 41.13 41.130000000000000000 41.130000000000000000 +43 14669 97 97.0 52.94 52.94 52.940000000000000000 52.940000000000000000 +44 2351 56 56.0 55.53 55.53 55.530000000000000000 55.530000000000000000 +44 2623 18 18.0 39.17 39.17 39.170000000000000000 39.170000000000000000 +44 7303 14 14.0 36.13 36.13 36.130000000000000000 36.130000000000000000 +44 7527 67 67.0 90.05 90.05 90.050000000000000000 90.050000000000000000 +44 9059 68 68.0 30.11 30.11 30.110000000000000000 30.110000000000000000 +44 11707 83 83.0 85.49 85.49 85.490000000000000000 85.490000000000000000 +44 12341 20 20.0 82.28 82.28 82.280000000000000000 82.280000000000000000 +44 13331 98 98.0 3.53 3.53 3.530000000000000000 3.530000000000000000 +44 13449 45 45.0 50.83 50.83 50.830000000000000000 50.830000000000000000 +44 14149 80 80.0 18.83 18.83 18.830000000000000000 18.830000000000000000 +44 15803 81 81.0 43.81 43.81 43.810000000000000000 43.810000000000000000 +44 16491 56 56.0 32.28 32.28 32.280000000000000000 32.280000000000000000 +44 16837 92 92.0 30.11 30.11 30.110000000000000000 30.110000000000000000 +44 16909 61 61.0 92.15 92.15 92.150000000000000000 92.150000000000000000 +45 811 62 62.0 23.41 23.41 23.410000000000000000 23.410000000000000000 +45 1479 49 49.0 5.01 5.01 5.010000000000000000 5.010000000000000000 +45 3265 98 98.0 27.12 27.12 27.120000000000000000 27.120000000000000000 +45 5309 18 18.0 51.16 51.16 51.160000000000000000 51.160000000000000000 +45 7363 87 87.0 85.95 85.95 85.950000000000000000 85.950000000000000000 +45 10115 68 68.0 38.09 38.09 38.090000000000000000 38.090000000000000000 +45 11095 40 40.0 52.97 52.97 52.970000000000000000 52.970000000000000000 +45 13133 46 46.0 85.87 85.87 85.870000000000000000 85.870000000000000000 +45 16349 6 6.0 94.59 94.59 94.590000000000000000 94.590000000000000000 +46 1960 12 12.0 53.47 53.47 53.470000000000000000 53.470000000000000000 +46 3010 67 67.0 66.87 66.87 66.870000000000000000 66.870000000000000000 +46 7040 33 33.0 90.87 90.87 90.870000000000000000 90.870000000000000000 +46 8065 NULL NULL 43.04 43.04 43.040000000000000000 43.040000000000000000 +46 11426 72 72.0 53.81 53.81 53.810000000000000000 53.810000000000000000 +46 13042 58 58.0 41.38 41.38 41.380000000000000000 41.380000000000000000 +46 15595 32 32.0 29.12 29.12 29.120000000000000000 29.120000000000000000 +46 16540 30 30.0 54.36 54.36 54.360000000000000000 54.360000000000000000 +46 17150 57 57.0 71.68 71.68 71.680000000000000000 71.680000000000000000 +46 17384 13 13.0 93.68 93.68 93.680000000000000000 93.680000000000000000 +47 254 NULL NULL NULL NULL NULL NULL +47 481 30 30.0 36.51 36.51 36.510000000000000000 36.510000000000000000 +47 1132 66 66.0 53.46 53.46 53.460000000000000000 53.460000000000000000 +47 1916 71 71.0 47.62 47.62 47.620000000000000000 47.620000000000000000 +47 3085 51 51.0 63.55 63.55 63.550000000000000000 63.550000000000000000 +47 3202 7 7.0 26.06 26.06 26.060000000000000000 26.060000000000000000 +47 3878 NULL NULL NULL NULL NULL NULL +47 4774 11 11.0 63.71 63.71 63.710000000000000000 63.710000000000000000 +47 5008 82 82.0 1.76 1.76 1.760000000000000000 1.760000000000000000 +47 5305 NULL NULL 84.7 84.7 84.700000000000000000 84.700000000000000000 +47 5468 7 7.0 5.03 5.03 5.030000000000000000 5.030000000000000000 +47 7214 1 1.0 12.8 12.8 12.800000000000000000 12.800000000000000000 +47 9770 33 33.0 69.12 69.12 69.120000000000000000 69.120000000000000000 +47 13246 47 47.0 11.71 11.71 11.710000000000000000 11.710000000000000000 +47 13477 10 10.0 78.83 78.83 78.830000000000000000 78.830000000000000000 +48 1761 22 22.0 55.73 55.73 55.730000000000000000 55.730000000000000000 +48 2820 4 4.0 6.46 6.46 6.460000000000000000 6.460000000000000000 +48 2829 65 65.0 22.1 22.1 22.100000000000000000 22.100000000000000000 +48 4431 39 39.0 97.07 97.07 97.070000000000000000 97.070000000000000000 +48 5971 29 29.0 40.46 40.46 40.460000000000000000 40.460000000000000000 +48 6085 1 1.0 58.13 58.13 58.130000000000000000 58.130000000000000000 +48 6684 44 44.0 20.22 20.22 20.220000000000000000 20.220000000000000000 +48 9199 88 88.0 37.89 37.89 37.890000000000000000 37.890000000000000000 +48 11259 NULL NULL NULL NULL NULL NULL +48 12468 62 62.0 43.72 43.72 43.720000000000000000 43.720000000000000000 +48 13153 74 74.0 34.26 34.26 34.260000000000000000 34.260000000000000000 +48 17799 17 17.0 80.36 80.36 80.360000000000000000 80.360000000000000000 +49 749 60 60.0 42.11 42.11 42.110000000000000000 42.110000000000000000 +49 2135 4 4.0 15.8 15.8 15.800000000000000000 15.800000000000000000 +49 5342 69 69.0 46.41 46.41 46.410000000000000000 46.410000000000000000 +49 5852 47 47.0 74.9 74.9 74.900000000000000000 74.900000000000000000 +49 6805 40 40.0 12.9 12.9 12.900000000000000000 12.900000000000000000 +49 7141 94 94.0 50.5 50.5 50.500000000000000000 50.500000000000000000 +49 9049 68 68.0 75.38 75.38 75.380000000000000000 75.380000000000000000 +49 9553 71 71.0 29.28 29.28 29.280000000000000000 29.280000000000000000 +49 12737 48 48.0 2.17 2.17 2.170000000000000000 2.170000000000000000 +49 15155 84 84.0 4.4 4.4 4.400000000000000000 4.400000000000000000 +49 16361 4 4.0 79.85 79.85 79.850000000000000000 79.850000000000000000 +50 1280 69 69.0 8.66 8.66 8.660000000000000000 8.660000000000000000 +50 1312 30 30.0 25.84 25.84 25.840000000000000000 25.840000000000000000 +50 1909 53 53.0 56.01 56.01 56.010000000000000000 56.010000000000000000 +50 1984 40 40.0 8.81 8.81 8.810000000000000000 8.810000000000000000 +50 3097 64 64.0 33.17 33.17 33.170000000000000000 33.170000000000000000 +50 5023 NULL NULL 16.24 16.24 16.240000000000000000 16.240000000000000000 +50 7135 69 69.0 12.68 12.68 12.680000000000000000 12.680000000000000000 +50 16081 82 82.0 99.55 99.55 99.550000000000000000 99.550000000000000000 +51 422 21 21.0 69.89 69.89 69.890000000000000000 69.890000000000000000 +51 3091 28 28.0 92.87 92.87 92.870000000000000000 92.870000000000000000 +51 4687 6 6.0 93.02 93.02 93.020000000000000000 93.020000000000000000 +51 5029 12 12.0 34.53 34.53 34.530000000000000000 34.530000000000000000 +51 5059 51 51.0 48.54 48.54 48.540000000000000000 48.540000000000000000 +51 6565 33 33.0 32.44 32.44 32.440000000000000000 32.440000000000000000 +51 8384 79 79.0 15.35 15.35 15.350000000000000000 15.350000000000000000 +51 9311 90 90.0 39.48 39.48 39.480000000000000000 39.480000000000000000 +51 10133 54 54.0 46.71 46.71 46.710000000000000000 46.710000000000000000 +51 11234 NULL NULL NULL NULL NULL NULL +51 12625 53 53.0 97.27 97.27 97.270000000000000000 97.270000000000000000 +51 13199 97 97.0 99.32 99.32 99.320000000000000000 99.320000000000000000 +51 17483 22 22.0 31.99 31.99 31.990000000000000000 31.990000000000000000 +51 17705 66 66.0 46.11 46.11 46.110000000000000000 46.110000000000000000 +52 2420 90 90.0 22.31 22.31 22.310000000000000000 22.310000000000000000 +52 3334 73 73.0 29.2 29.2 29.200000000000000000 29.200000000000000000 +52 6098 NULL NULL 4.83 4.83 4.830000000000000000 4.830000000000000000 +52 7606 45 45.0 42.51 42.51 42.510000000000000000 42.510000000000000000 +52 11488 76 76.0 78.68 78.68 78.680000000000000000 78.680000000000000000 +52 15649 29 29.0 22.86 22.86 22.860000000000000000 22.860000000000000000 +52 16646 48 48.0 95.82 95.82 95.820000000000000000 95.820000000000000000 +52 17402 91 91.0 81.94 81.94 81.940000000000000000 81.940000000000000000 +52 17456 37 37.0 7.93 7.93 7.930000000000000000 7.930000000000000000 +53 1114 40 40.0 28.34 28.34 28.340000000000000000 28.340000000000000000 +53 2095 62 62.0 23.98 23.98 23.980000000000000000 23.980000000000000000 +53 2786 70 70.0 76.55 76.55 76.550000000000000000 76.550000000000000000 +53 2887 39 39.0 66.68 66.68 66.680000000000000000 66.680000000000000000 +53 7546 58 58.0 73.79 73.79 73.790000000000000000 73.790000000000000000 +53 11348 38 38.0 5.54 5.54 5.540000000000000000 5.540000000000000000 +53 13220 76 76.0 27.93 27.93 27.930000000000000000 27.930000000000000000 +53 13795 38 38.0 93.96 93.96 93.960000000000000000 93.960000000000000000 +53 15991 37 37.0 77.75 77.75 77.750000000000000000 77.750000000000000000 +53 16420 14 14.0 36.72 36.72 36.720000000000000000 36.720000000000000000 +53 16648 79 79.0 55.29 55.29 55.290000000000000000 55.290000000000000000 +53 17296 43 43.0 21.4 21.4 21.400000000000000000 21.400000000000000000 +53 17560 15 15.0 46.39 46.39 46.390000000000000000 46.390000000000000000 +54 702 40 40.0 16.76 16.76 16.760000000000000000 16.760000000000000000 +54 825 50 50.0 99.64 99.64 99.640000000000000000 99.640000000000000000 +54 1165 62 62.0 69.84 69.84 69.840000000000000000 69.840000000000000000 +54 3861 NULL NULL NULL NULL NULL NULL +54 6517 40 40.0 23.38 23.38 23.380000000000000000 23.380000000000000000 +54 9159 75 75.0 55.47 55.47 55.470000000000000000 55.470000000000000000 +54 14737 38 38.0 29.2 29.2 29.200000000000000000 29.200000000000000000 +54 16059 15 15.0 7.9 7.9 7.900000000000000000 7.900000000000000000 +54 16974 NULL NULL NULL NULL NULL NULL +54 17479 34 34.0 94.14 94.14 94.140000000000000000 94.140000000000000000 +55 1339 16 16.0 71.32 71.32 71.320000000000000000 71.320000000000000000 +55 3001 7 7.0 57.58 57.58 57.580000000000000000 57.580000000000000000 +55 5137 33 33.0 57.28 57.28 57.280000000000000000 57.280000000000000000 +55 9703 44 44.0 57.21 57.21 57.210000000000000000 57.210000000000000000 +55 12170 92 92.0 69.53 69.53 69.530000000000000000 69.530000000000000000 +55 12205 90 90.0 56.92 56.92 56.920000000000000000 56.920000000000000000 +55 14135 36 36.0 26.4 26.4 26.400000000000000000 26.400000000000000000 +55 14923 71 71.0 30.04 30.04 30.040000000000000000 30.040000000000000000 +55 17677 17 17.0 26.59 26.59 26.590000000000000000 26.590000000000000000 +56 4242 2 2.0 88.74 88.74 88.740000000000000000 88.740000000000000000 +56 4506 57 57.0 69.45 69.45 69.450000000000000000 69.450000000000000000 +56 8353 35 35.0 80.42 80.42 80.420000000000000000 80.420000000000000000 +56 8691 59 59.0 98.91 98.91 98.910000000000000000 98.910000000000000000 +56 8707 68 68.0 79.7 79.7 79.700000000000000000 79.700000000000000000 +56 10362 54 54.0 82.62 82.62 82.620000000000000000 82.620000000000000000 +56 16620 23 23.0 9.94 9.94 9.940000000000000000 9.940000000000000000 +56 17331 74 74.0 32.12 32.12 32.120000000000000000 32.120000000000000000 +57 3253 71 71.0 91.02 91.02 91.020000000000000000 91.020000000000000000 +57 4028 88 88.0 82.23 82.23 82.230000000000000000 82.230000000000000000 +57 4933 22 22.0 93.86 93.86 93.860000000000000000 93.860000000000000000 +57 12596 91 91.0 36.67 36.67 36.670000000000000000 36.670000000000000000 +57 12721 62 62.0 76.4 76.4 76.400000000000000000 76.400000000000000000 +57 12740 52 52.0 55.58 55.58 55.580000000000000000 55.580000000000000000 +57 15182 86 86.0 84.85 84.85 84.850000000000000000 84.850000000000000000 +57 17729 26 26.0 97.2 97.2 97.200000000000000000 97.200000000000000000 +57 17993 99 99.0 NULL NULL NULL NULL +58 1829 52 52.0 19.97 19.97 19.970000000000000000 19.970000000000000000 +58 3848 6 6.0 45.41 45.41 45.410000000000000000 45.410000000000000000 +58 5117 2 2.0 56.01 56.01 56.010000000000000000 56.010000000000000000 +58 7649 19 19.0 44.04 44.04 44.040000000000000000 44.040000000000000000 +58 9743 62 62.0 73.14 73.14 73.140000000000000000 73.140000000000000000 +58 10802 14 14.0 79.64 79.64 79.640000000000000000 79.640000000000000000 +58 15635 6 6.0 82.45 82.45 82.450000000000000000 82.450000000000000000 +58 16472 6 6.0 7.58 7.58 7.580000000000000000 7.580000000000000000 +58 16949 35 35.0 25.76 25.76 25.760000000000000000 25.760000000000000000 +59 3133 92 92.0 14.57 14.57 14.570000000000000000 14.570000000000000000 +59 3546 22 22.0 64.21 64.21 64.210000000000000000 64.210000000000000000 +59 5772 70 70.0 56.19 56.19 56.190000000000000000 56.190000000000000000 +59 7087 80 80.0 58.71 58.71 58.710000000000000000 58.710000000000000000 +59 8010 46 46.0 20.15 20.15 20.150000000000000000 20.150000000000000000 +59 8335 36 36.0 32.82 32.82 32.820000000000000000 32.820000000000000000 +59 9348 62 62.0 83.62 83.62 83.620000000000000000 83.620000000000000000 +59 9397 92 92.0 70.69 70.69 70.690000000000000000 70.690000000000000000 +59 10651 100 100.0 35.78 35.78 35.780000000000000000 35.780000000000000000 +59 11916 19 19.0 34.31 34.31 34.310000000000000000 34.310000000000000000 +59 12858 90 90.0 61.18 61.18 61.180000000000000000 61.180000000000000000 +59 14529 44 44.0 42.76 42.76 42.760000000000000000 42.760000000000000000 +60 97 50 50.0 37.49 37.49 37.490000000000000000 37.490000000000000000 +60 555 62 62.0 49.17 49.17 49.170000000000000000 49.170000000000000000 +60 633 71 71.0 96.74 96.74 96.740000000000000000 96.740000000000000000 +60 999 43 43.0 22.13 22.13 22.130000000000000000 22.130000000000000000 +60 1117 78 78.0 46.63 46.63 46.630000000000000000 46.630000000000000000 +60 1573 90 90.0 19.02 19.02 19.020000000000000000 19.020000000000000000 +60 4041 25 25.0 36.26 36.26 36.260000000000000000 36.260000000000000000 +60 4235 28 28.0 29.67 29.67 29.670000000000000000 29.670000000000000000 +60 4513 72 72.0 79.56 79.56 79.560000000000000000 79.560000000000000000 +60 4937 22 22.0 27.75 27.75 27.750000000000000000 27.750000000000000000 +60 7231 95 95.0 45.42 45.42 45.420000000000000000 45.420000000000000000 +60 10277 62 62.0 28.05 28.05 28.050000000000000000 28.050000000000000000 +60 10393 75 75.0 98.86 98.86 98.860000000000000000 98.860000000000000000 +60 13975 14 14.0 76.01 76.01 76.010000000000000000 76.010000000000000000 +60 16887 25 25.0 17.92 17.92 17.920000000000000000 17.920000000000000000 +60 17755 88 88.0 52.17 52.17 52.170000000000000000 52.170000000000000000 +61 1106 4 4.0 78.21 78.21 78.210000000000000000 78.210000000000000000 +61 2264 36 36.0 60.94 60.94 60.940000000000000000 60.940000000000000000 +61 3362 48 48.0 67.92 67.92 67.920000000000000000 67.920000000000000000 +61 4567 26 26.0 29.6 29.6 29.600000000000000000 29.600000000000000000 +61 5528 78 78.0 13.85 13.85 13.850000000000000000 13.850000000000000000 +61 6380 77 77.0 69.52 69.52 69.520000000000000000 69.520000000000000000 +61 7591 78 78.0 91.99 91.99 91.990000000000000000 91.990000000000000000 +61 8924 11 11.0 86.51 86.51 86.510000000000000000 86.510000000000000000 +61 10330 8 8.0 46.45 46.45 46.450000000000000000 46.450000000000000000 +61 16462 26 26.0 24.34 24.34 24.340000000000000000 24.340000000000000000 +62 4093 94 94.0 5.53 5.53 5.530000000000000000 5.530000000000000000 +62 6403 NULL NULL 92.02 92.02 92.020000000000000000 92.020000000000000000 +62 8457 37 37.0 99.97 99.97 99.970000000000000000 99.970000000000000000 +62 10149 75 75.0 48.36 48.36 48.360000000000000000 48.360000000000000000 +62 12163 29 29.0 16.7 16.7 16.700000000000000000 16.700000000000000000 +62 12199 5 5.0 85.54 85.54 85.540000000000000000 85.540000000000000000 +62 12407 NULL NULL NULL NULL NULL NULL +62 13559 80 80.0 52.56 52.56 52.560000000000000000 52.560000000000000000 +62 15399 74 74.0 71.7 71.7 71.700000000000000000 71.700000000000000000 +62 15733 40 40.0 28.03 28.03 28.030000000000000000 28.030000000000000000 +62 16151 93 93.0 84.72 84.72 84.720000000000000000 84.720000000000000000 +63 4488 73 73.0 22.85 22.85 22.850000000000000000 22.850000000000000000 +63 5079 79 79.0 36.05 36.05 36.050000000000000000 36.050000000000000000 +63 5217 66 66.0 15.71 15.71 15.710000000000000000 15.710000000000000000 +63 5658 99 99.0 88.78 88.78 88.780000000000000000 88.780000000000000000 +63 9319 80 80.0 9.27 9.27 9.270000000000000000 9.270000000000000000 +63 11370 38 38.0 56.43 56.43 56.430000000000000000 56.430000000000000000 +63 11946 85 85.0 94.28 94.28 94.280000000000000000 94.280000000000000000 +63 13339 19 19.0 19.44 19.44 19.440000000000000000 19.440000000000000000 +63 15793 40 40.0 75.62 75.62 75.620000000000000000 75.620000000000000000 +63 16569 69 69.0 NULL NULL NULL NULL +64 1213 NULL NULL 38.46 38.46 38.460000000000000000 38.460000000000000000 +64 3090 87 87.0 78.06 78.06 78.060000000000000000 78.060000000000000000 +64 3963 NULL NULL NULL NULL NULL NULL +64 11835 82 82.0 30.65 30.65 30.650000000000000000 30.650000000000000000 +64 13224 NULL NULL NULL NULL NULL NULL +64 14407 8 8.0 44.36 44.36 44.360000000000000000 44.360000000000000000 +64 15867 59 59.0 43.77 43.77 43.770000000000000000 43.770000000000000000 +64 15936 30 30.0 56.24 56.24 56.240000000000000000 56.240000000000000000 +64 16921 19 19.0 98.61 98.61 98.610000000000000000 98.610000000000000000 +64 17586 78 78.0 77.26 77.26 77.260000000000000000 77.260000000000000000 +64 17617 17 17.0 91.67 91.67 91.670000000000000000 91.670000000000000000 +65 2287 100 100.0 91.8 91.8 91.800000000000000000 91.800000000000000000 +65 4227 42 42.0 45.38 45.38 45.380000000000000000 45.380000000000000000 +65 9625 51 51.0 40.95 40.95 40.950000000000000000 40.950000000000000000 +65 9847 54 54.0 64.26 64.26 64.260000000000000000 64.260000000000000000 +65 13897 40 40.0 52.84 52.84 52.840000000000000000 52.840000000000000000 +65 14905 85 85.0 81.24 81.24 81.240000000000000000 81.240000000000000000 +65 15177 55 55.0 89.19 89.19 89.190000000000000000 89.190000000000000000 +65 17025 67 67.0 25.52 25.52 25.520000000000000000 25.520000000000000000 +66 6507 76 76.0 43.81 43.81 43.810000000000000000 43.810000000000000000 +66 7033 65 65.0 4.08 4.08 4.080000000000000000 4.080000000000000000 +66 7227 66 66.0 92.15 92.15 92.150000000000000000 92.150000000000000000 +66 8197 41 41.0 84.22 84.22 84.220000000000000000 84.220000000000000000 +66 9237 29 29.0 76.94 76.94 76.940000000000000000 76.940000000000000000 +66 10019 10 10.0 48.77 48.77 48.770000000000000000 48.770000000000000000 +66 11419 66 66.0 10.12 10.12 10.120000000000000000 10.120000000000000000 +66 15629 20 20.0 22.04 22.04 22.040000000000000000 22.040000000000000000 +66 16745 91 91.0 9.53 9.53 9.530000000000000000 9.530000000000000000 +66 16795 28 28.0 42.0 42.0 42.000000000000000000 42.000000000000000000 +67 757 77 77.0 94.12 94.12 94.120000000000000000 94.120000000000000000 +67 2133 74 74.0 71.99 71.99 71.990000000000000000 71.990000000000000000 +67 3439 73 73.0 23.52 23.52 23.520000000000000000 23.520000000000000000 +67 4155 87 87.0 87.74 87.74 87.740000000000000000 87.740000000000000000 +67 5113 NULL NULL 49.59 49.59 49.590000000000000000 49.590000000000000000 +67 7020 79 79.0 97.01 97.01 97.010000000000000000 97.010000000000000000 +67 7507 77 77.0 26.78 26.78 26.780000000000000000 26.780000000000000000 +67 8469 59 59.0 NULL NULL NULL NULL +67 8871 71 71.0 78.59 78.59 78.590000000000000000 78.590000000000000000 +67 12087 70 70.0 80.71 80.71 80.710000000000000000 80.710000000000000000 +67 15699 44 44.0 34.59 34.59 34.590000000000000000 34.590000000000000000 +68 1387 74 74.0 90.2 90.2 90.200000000000000000 90.200000000000000000 +68 1603 57 57.0 21.03 21.03 21.030000000000000000 21.030000000000000000 +68 1820 54 54.0 55.82 55.82 55.820000000000000000 55.820000000000000000 +68 2035 22 22.0 54.35 54.35 54.350000000000000000 54.350000000000000000 +68 2296 52 52.0 98.9 98.9 98.900000000000000000 98.900000000000000000 +68 2564 83 83.0 77.32 77.32 77.320000000000000000 77.320000000000000000 +68 5162 23 23.0 83.48 83.48 83.480000000000000000 83.480000000000000000 +68 6763 77 77.0 96.29 96.29 96.290000000000000000 96.290000000000000000 +68 7765 NULL NULL 69.58 69.58 69.580000000000000000 69.580000000000000000 +68 12526 3 3.0 13.06 13.06 13.060000000000000000 13.060000000000000000 +68 12724 88 88.0 9.63 9.63 9.630000000000000000 9.630000000000000000 +68 17426 2 2.0 48.36 48.36 48.360000000000000000 48.360000000000000000 +68 17600 13 13.0 52.66 52.66 52.660000000000000000 52.660000000000000000 +69 322 45 45.0 NULL NULL NULL NULL +69 337 34 34.0 20.99 20.99 20.990000000000000000 20.990000000000000000 +69 4208 9 9.0 99.77 99.77 99.770000000000000000 99.770000000000000000 +69 4267 10 10.0 72.37 72.37 72.370000000000000000 72.370000000000000000 +69 6136 7 7.0 49.79 49.79 49.790000000000000000 49.790000000000000000 +69 7264 67 67.0 78.29 78.29 78.290000000000000000 78.290000000000000000 +69 7822 30 30.0 78.1 78.1 78.100000000000000000 78.100000000000000000 +69 8599 53 53.0 56.42 56.42 56.420000000000000000 56.420000000000000000 +69 11137 68 68.0 22.04 22.04 22.040000000000000000 22.040000000000000000 +69 13489 66 66.0 2.68 2.68 2.680000000000000000 2.680000000000000000 +69 13792 NULL NULL 85.64 85.64 85.640000000000000000 85.640000000000000000 +69 15448 16 16.0 94.38 94.38 94.380000000000000000 94.380000000000000000 +70 1592 53 53.0 99.59 99.59 99.590000000000000000 99.590000000000000000 +70 2462 NULL NULL 92.7 92.7 92.700000000000000000 92.700000000000000000 +70 3296 48 48.0 10.23 10.23 10.230000000000000000 10.230000000000000000 +70 3947 NULL NULL 63.8 63.8 63.800000000000000000 63.800000000000000000 +70 6185 82 82.0 84.6 84.6 84.600000000000000000 84.600000000000000000 +70 6425 NULL NULL NULL NULL NULL NULL +70 8893 17 17.0 63.51 63.51 63.510000000000000000 63.510000000000000000 +70 9857 20 20.0 54.96 54.96 54.960000000000000000 54.960000000000000000 +70 14549 4 4.0 35.39 35.39 35.390000000000000000 35.390000000000000000 +70 17815 95 95.0 36.89 36.89 36.890000000000000000 36.890000000000000000 +71 457 75 75.0 27.02 27.02 27.020000000000000000 27.020000000000000000 +71 1888 4 4.0 40.47 40.47 40.470000000000000000 40.470000000000000000 +71 2098 51 51.0 57.87 57.87 57.870000000000000000 57.870000000000000000 +71 4144 49 49.0 26.75 26.75 26.750000000000000000 26.750000000000000000 +71 5858 NULL NULL NULL NULL NULL NULL +71 6008 54 54.0 38.98 38.98 38.980000000000000000 38.980000000000000000 +71 7504 3 3.0 78.44 78.44 78.440000000000000000 78.440000000000000000 +71 8887 10 10.0 61.4 61.4 61.400000000000000000 61.400000000000000000 +71 9274 36 36.0 12.39 12.39 12.390000000000000000 12.390000000000000000 +71 9769 79 79.0 52.15 52.15 52.150000000000000000 52.150000000000000000 +71 9790 96 96.0 37.78 37.78 37.780000000000000000 37.780000000000000000 +71 9997 26 26.0 53.28 53.28 53.280000000000000000 53.280000000000000000 +71 10108 66 66.0 9.49 9.49 9.490000000000000000 9.490000000000000000 +71 10288 30 30.0 29.57 29.57 29.570000000000000000 29.570000000000000000 +71 11168 79 79.0 24.66 24.66 24.660000000000000000 24.660000000000000000 +71 17246 90 90.0 50.57 50.57 50.570000000000000000 50.570000000000000000 +72 1535 9 9.0 69.06 69.06 69.060000000000000000 69.060000000000000000 +72 5917 85 85.0 NULL NULL NULL NULL +72 6113 45 45.0 59.65 59.65 59.650000000000000000 59.650000000000000000 +72 6671 13 13.0 42.82 42.82 42.820000000000000000 42.820000000000000000 +72 9860 26 26.0 69.92 69.92 69.920000000000000000 69.920000000000000000 +72 10427 66 66.0 55.31 55.31 55.310000000000000000 55.310000000000000000 +72 10753 16 16.0 32.01 32.01 32.010000000000000000 32.010000000000000000 +72 11741 62 62.0 79.25 79.25 79.250000000000000000 79.250000000000000000 +72 12788 29 29.0 34.57 34.57 34.570000000000000000 34.570000000000000000 +72 12901 57 57.0 1.64 1.64 1.640000000000000000 1.640000000000000000 +72 13085 94 94.0 85.13 85.13 85.130000000000000000 85.130000000000000000 +72 13423 62 62.0 34.39 34.39 34.390000000000000000 34.390000000000000000 +72 13904 37 37.0 40.39 40.39 40.390000000000000000 40.390000000000000000 +72 15587 87 87.0 19.04 19.04 19.040000000000000000 19.040000000000000000 +72 16765 56 56.0 89.44 89.44 89.440000000000000000 89.440000000000000000 +73 247 53 53.0 5.22 5.22 5.220000000000000000 5.220000000000000000 +73 1063 37 37.0 34.93 34.93 34.930000000000000000 34.930000000000000000 +73 3205 82 82.0 44.64 44.64 44.640000000000000000 44.640000000000000000 +73 4946 54 54.0 71.08 71.08 71.080000000000000000 71.080000000000000000 +73 6862 58 58.0 86.48 86.48 86.480000000000000000 86.480000000000000000 +73 10051 49 49.0 97.28 97.28 97.280000000000000000 97.280000000000000000 +73 12502 75 75.0 21.63 21.63 21.630000000000000000 21.630000000000000000 +73 15109 38 38.0 53.9 53.9 53.900000000000000000 53.900000000000000000 +73 16519 97 97.0 82.11 82.11 82.110000000000000000 82.110000000000000000 +73 16585 38 38.0 69.27 69.27 69.270000000000000000 69.270000000000000000 +73 17269 40 40.0 NULL NULL NULL NULL +74 326 29 29.0 76.73 76.73 76.730000000000000000 76.730000000000000000 +74 3104 78 78.0 52.23 52.23 52.230000000000000000 52.230000000000000000 +74 3175 23 23.0 50.69 50.69 50.690000000000000000 50.690000000000000000 +74 3278 NULL NULL NULL NULL NULL NULL +74 3542 96 96.0 93.18 93.18 93.180000000000000000 93.180000000000000000 +74 3754 26 26.0 89.35 89.35 89.350000000000000000 89.350000000000000000 +74 5492 54 54.0 31.24 31.24 31.240000000000000000 31.240000000000000000 +74 7694 17 17.0 36.61 36.61 36.610000000000000000 36.610000000000000000 +74 8653 12 12.0 4.33 4.33 4.330000000000000000 4.330000000000000000 +74 9620 95 95.0 56.35 56.35 56.350000000000000000 56.350000000000000000 +74 10069 99 99.0 99.98 99.98 99.980000000000000000 99.980000000000000000 +74 13208 87 87.0 82.61 82.61 82.610000000000000000 82.610000000000000000 +74 16694 72 72.0 36.04 36.04 36.040000000000000000 36.040000000000000000 +75 607 20 20.0 88.61 88.61 88.610000000000000000 88.610000000000000000 +75 2948 25 25.0 7.48 7.48 7.480000000000000000 7.480000000000000000 +75 4625 73 73.0 76.04 76.04 76.040000000000000000 76.040000000000000000 +75 6938 89 89.0 20.73 20.73 20.730000000000000000 20.730000000000000000 +75 6953 71 71.0 33.34 33.34 33.340000000000000000 33.340000000000000000 +75 8726 6 6.0 25.87 25.87 25.870000000000000000 25.870000000000000000 +75 9905 54 54.0 63.01 63.01 63.010000000000000000 63.010000000000000000 +75 10217 85 85.0 83.25 83.25 83.250000000000000000 83.250000000000000000 +75 11039 70 70.0 87.84 87.84 87.840000000000000000 87.840000000000000000 +75 14186 63 63.0 82.77 82.77 82.770000000000000000 82.770000000000000000 +75 16796 93 93.0 5.19 5.19 5.190000000000000000 5.190000000000000000 +76 257 5 5.0 8.47 8.47 8.470000000000000000 8.470000000000000000 +76 465 2 2.0 95.45 95.45 95.450000000000000000 95.450000000000000000 +76 1107 16 16.0 NULL NULL NULL NULL +76 1503 97 97.0 30.22 30.22 30.220000000000000000 30.220000000000000000 +76 2265 98 98.0 89.7 89.7 89.700000000000000000 89.700000000000000000 +76 2869 32 32.0 NULL NULL NULL NULL +76 3363 25 25.0 89.9 89.9 89.900000000000000000 89.900000000000000000 +76 4237 48 48.0 60.58 60.58 60.580000000000000000 60.580000000000000000 +76 4567 40 40.0 2.19 2.19 2.190000000000000000 2.190000000000000000 +76 5529 78 78.0 49.64 49.64 49.640000000000000000 49.640000000000000000 +76 6381 50 50.0 34.93 34.93 34.930000000000000000 34.930000000000000000 +76 7591 27 27.0 61.86 61.86 61.860000000000000000 61.860000000000000000 +76 8925 6 6.0 80.04 80.04 80.040000000000000000 80.040000000000000000 +76 10331 3 3.0 29.09 29.09 29.090000000000000000 29.090000000000000000 +76 16463 53 53.0 86.06 86.06 86.060000000000000000 86.060000000000000000 +77 992 62 62.0 21.65 21.65 21.650000000000000000 21.650000000000000000 +77 1399 34 34.0 96.21 96.21 96.210000000000000000 96.210000000000000000 +77 2713 85 85.0 85.72 85.72 85.720000000000000000 85.720000000000000000 +77 3868 89 89.0 3.72 3.72 3.720000000000000000 3.720000000000000000 +77 6289 30 30.0 26.16 26.16 26.160000000000000000 26.160000000000000000 +77 7339 88 88.0 31.13 31.13 31.130000000000000000 31.130000000000000000 +77 7448 95 95.0 29.07 29.07 29.070000000000000000 29.070000000000000000 +77 7486 49 49.0 NULL NULL NULL NULL +77 8686 38 38.0 45.3 45.3 45.300000000000000000 45.300000000000000000 +77 9220 90 90.0 87.41 87.41 87.410000000000000000 87.410000000000000000 +77 11918 36 36.0 25.95 25.95 25.950000000000000000 25.950000000000000000 +77 12439 95 95.0 74.32 74.32 74.320000000000000000 74.320000000000000000 +77 13456 48 48.0 85.61 85.61 85.610000000000000000 85.610000000000000000 +77 14815 18 18.0 69.28 69.28 69.280000000000000000 69.280000000000000000 +77 16687 16 16.0 67.63 67.63 67.630000000000000000 67.630000000000000000 +78 901 3 3.0 54.5 54.5 54.500000000000000000 54.500000000000000000 +78 3304 50 50.0 61.49 61.49 61.490000000000000000 61.490000000000000000 +78 3856 27 27.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +78 5965 78 78.0 7.47 7.47 7.470000000000000000 7.470000000000000000 +78 6044 59 59.0 15.94 15.94 15.940000000000000000 15.940000000000000000 +78 6110 43 43.0 28.45 28.45 28.450000000000000000 28.450000000000000000 +78 6500 76 76.0 38.42 38.42 38.420000000000000000 38.420000000000000000 +78 7576 87 87.0 60.62 60.62 60.620000000000000000 60.620000000000000000 +78 8611 79 79.0 95.42 95.42 95.420000000000000000 95.420000000000000000 +78 10507 6 6.0 50.83 50.83 50.830000000000000000 50.830000000000000000 +78 11209 7 7.0 88.17 88.17 88.170000000000000000 88.170000000000000000 +78 12706 19 19.0 81.1 81.1 81.100000000000000000 81.100000000000000000 +78 14996 39 39.0 36.47 36.47 36.470000000000000000 36.470000000000000000 +79 247 NULL NULL NULL NULL NULL NULL +79 1063 85 85.0 77.51 77.51 77.510000000000000000 77.510000000000000000 +79 3205 48 48.0 21.34 21.34 21.340000000000000000 21.340000000000000000 +79 4947 35 35.0 99.77 99.77 99.770000000000000000 99.770000000000000000 +79 6864 1 1.0 86.12 86.12 86.120000000000000000 86.120000000000000000 +79 10051 10 10.0 94.53 94.53 94.530000000000000000 94.530000000000000000 +79 10524 36 36.0 21.73 21.73 21.730000000000000000 21.730000000000000000 +79 12504 81 81.0 14.87 14.87 14.870000000000000000 14.870000000000000000 +79 14322 41 41.0 58.88 58.88 58.880000000000000000 58.880000000000000000 +79 15109 NULL NULL 45.07 45.07 45.070000000000000000 45.070000000000000000 +79 15498 3 3.0 94.64 94.64 94.640000000000000000 94.640000000000000000 +79 15888 58 58.0 99.75 99.75 99.750000000000000000 99.750000000000000000 +79 16519 9 9.0 69.84 69.84 69.840000000000000000 69.840000000000000000 +79 16585 93 93.0 70.7 70.7 70.700000000000000000 70.700000000000000000 +79 17269 81 81.0 5.88 5.88 5.880000000000000000 5.880000000000000000 +80 998 93 93.0 69.32 69.32 69.320000000000000000 69.320000000000000000 +80 1519 25 25.0 66.36 66.36 66.360000000000000000 66.360000000000000000 +80 1573 40 40.0 43.33 43.33 43.330000000000000000 43.330000000000000000 +80 4040 66 66.0 15.01 15.01 15.010000000000000000 15.010000000000000000 +80 4513 NULL NULL 76.02 76.02 76.020000000000000000 76.020000000000000000 +80 4622 1 1.0 60.1 60.1 60.100000000000000000 60.100000000000000000 +80 7231 49 49.0 76.07 76.07 76.070000000000000000 76.070000000000000000 +80 7610 37 37.0 24.62 24.62 24.620000000000000000 24.620000000000000000 +80 10393 5 5.0 71.37 71.37 71.370000000000000000 71.370000000000000000 +80 12968 NULL NULL NULL NULL NULL NULL +80 13717 91 91.0 60.42 60.42 60.420000000000000000 60.420000000000000000 +80 13975 13 13.0 83.81 83.81 83.810000000000000000 83.810000000000000000 +80 16363 84 84.0 84.8 84.8 84.800000000000000000 84.800000000000000000 +80 16886 77 77.0 89.22 89.22 89.220000000000000000 89.220000000000000000 +80 17308 29 29.0 94.38 94.38 94.380000000000000000 94.380000000000000000 +80 17755 94 94.0 56.04 56.04 56.040000000000000000 56.040000000000000000 +81 4486 31 31.0 63.84 63.84 63.840000000000000000 63.840000000000000000 +81 5078 75 75.0 33.72 33.72 33.720000000000000000 33.720000000000000000 +81 5216 64 64.0 4.59 4.59 4.590000000000000000 4.590000000000000000 +81 5656 24 24.0 40.61 40.61 40.610000000000000000 40.610000000000000000 +81 7166 7 7.0 22.87 22.87 22.870000000000000000 22.870000000000000000 +81 7663 79 79.0 52.07 52.07 52.070000000000000000 52.070000000000000000 +81 8918 37 37.0 86.54 86.54 86.540000000000000000 86.540000000000000000 +81 9319 36 36.0 91.74 91.74 91.740000000000000000 91.740000000000000000 +81 11107 36 36.0 47.86 47.86 47.860000000000000000 47.860000000000000000 +81 11368 26 26.0 NULL NULL NULL NULL +81 13339 6 6.0 4.63 4.63 4.630000000000000000 4.630000000000000000 +81 15793 8 8.0 5.61 5.61 5.610000000000000000 5.610000000000000000 +82 2572 53 53.0 55.41 55.41 55.410000000000000000 55.410000000000000000 +82 7862 75 75.0 21.65 21.65 21.650000000000000000 21.650000000000000000 +82 13138 59 59.0 31.81 31.81 31.810000000000000000 31.810000000000000000 +82 14998 49 49.0 52.59 52.59 52.590000000000000000 52.590000000000000000 +82 17041 18 18.0 4.71 4.71 4.710000000000000000 4.710000000000000000 diff --git ql/src/test/results/clientpositive/vector_grouping_sets.q.out ql/src/test/results/clientpositive/vector_grouping_sets.q.out index 3d35fbf..8a8d1ef 100644 --- ql/src/test/results/clientpositive/vector_grouping_sets.q.out +++ ql/src/test/results/clientpositive/vector_grouping_sets.q.out @@ -162,9 +162,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: s_store_id (type: string), 0 (type: int) mode: hash @@ -196,8 +198,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial @@ -276,9 +280,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1, ConstantVectorExpression(val 0) -> 29:long native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string), 0 (type: int) mode: hash @@ -310,8 +316,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_include_no_sel.q.out ql/src/test/results/clientpositive/vector_include_no_sel.q.out index 8c8ef80..7f97f54 100644 --- ql/src/test/results/clientpositive/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/vector_include_no_sel.q.out @@ -239,8 +239,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 3:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -273,8 +275,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out index 82bef24..0ff11df 100644 --- ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/vector_mapjoin_reduce.q.out @@ -40,9 +40,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -74,8 +76,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial @@ -126,8 +130,10 @@ STAGE PLANS: Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int) mode: hash @@ -273,9 +279,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: l_partkey (type: int) mode: hash @@ -307,8 +315,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int) mode: mergepartial @@ -359,8 +369,10 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 2999 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: _col0 (type: int), _col1 (type: int) mode: hash diff --git ql/src/test/results/clientpositive/vector_null_projection.q.out ql/src/test/results/clientpositive/vector_null_projection.q.out index 94aea2f..bf3984f 100644 --- ql/src/test/results/clientpositive/vector_null_projection.q.out +++ ql/src/test/results/clientpositive/vector_null_projection.q.out @@ -64,7 +64,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Stage: Stage-0 @@ -111,8 +111,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: null (type: void) mode: hash @@ -134,8 +136,10 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: null (type: void) mode: hash @@ -156,8 +160,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: void) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 08cc168..f8de133 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -317,7 +317,7 @@ STAGE PLANS: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Select expression for SELECT operator: Data type void of Const void null not supported + notVectorizedReason: Select expression for SELECT operator: Vectorizing data type void not supported when mode = PROJECTION vectorized: false Stage: Stage-0 diff --git ql/src/test/results/clientpositive/vector_orderby_5.q.out ql/src/test/results/clientpositive/vector_orderby_5.q.out index b85eb75..9a72950 100644 --- ql/src/test/results/clientpositive/vector_orderby_5.q.out +++ ql/src/test/results/clientpositive/vector_orderby_5.q.out @@ -139,9 +139,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 3) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 7 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: bo (type: boolean) mode: hash @@ -175,8 +177,10 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: boolean) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_outer_join1.q.out ql/src/test/results/clientpositive/vector_outer_join1.q.out index 7a92bef..70bce01 100644 --- ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -688,8 +688,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -727,8 +729,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_outer_join2.q.out ql/src/test/results/clientpositive/vector_outer_join2.q.out index 6b51489..2265cb8 100644 --- ql/src/test/results/clientpositive/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_outer_join2.q.out @@ -335,8 +335,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCountStar(*) -> bigint, VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -374,8 +376,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), sum(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vector_outer_join3.q.out ql/src/test/results/clientpositive/vector_outer_join3.q.out index d299be4..e4e4825 100644 --- ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -242,7 +242,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","vectorOutput:":"true","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"groupByMode:":"MERGEPARTIAL","vectorOutput:":"false","native:":"false","vectorProcessingMode:":"NONE","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -282,7 +282,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","vectorOutput:":"true","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"groupByMode:":"MERGEPARTIAL","vectorOutput:":"false","native:":"false","vectorProcessingMode:":"NONE","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd @@ -322,7 +322,7 @@ left outer join small_alltypesorc_a hd on hd.cstring1 = c.cstring1 and hd.cint = c.cint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 4400 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 22 Data size: 4840 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 24 Data size: 5324 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","vectorOutput:":"true","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"groupByMode:":"MERGEPARTIAL","vectorOutput:":"false","native:":"false","vectorProcessingMode:":"NONE","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a c left outer join small_alltypesorc_a cd diff --git ql/src/test/results/clientpositive/vector_outer_join4.q.out ql/src/test/results/clientpositive/vector_outer_join4.q.out index 136e386..125ec07 100644 --- ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -780,7 +780,7 @@ left outer join small_alltypesorc_b hd on hd.ctinyint = c.ctinyint ) t1 POSTHOOK: type: QUERY -{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","vectorOutput:":"true","native:":"false","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"vectorOutput:":"false","native:":"false","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} +{"PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_2","children":{"Select Operator":{"expressions:":"cint (type: int)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_3","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_26"}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"TS_4","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_5","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_24"}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","TableScan Vectorization:":{"native:":"true","projectedOutputColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumns:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 6680 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"SEL_28","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 33 Data size: 7348 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_29","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 36 Data size: 8082 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"MAPJOIN_30","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","vectorOutput:":"true","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumns:":"[0]"},"mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_31","children":{"Reduce Output Operator":{"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_32"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"groupByVectorOutput:":"true","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"Group By Vectorization:":{"groupByMode:":"MERGEPARTIAL","vectorOutput:":"false","native:":"false","vectorProcessingMode:":"NONE","projectedOutputColumns:":"null"},"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","OperatorId:":"GBY_15","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_17"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_33"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out index 724ef45..f90100d 100644 --- ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out +++ ql/src/test/results/clientpositive/vector_reduce_groupby_decimal.q.out @@ -59,9 +59,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinDecimal(col 2) -> decimal(20,10) className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 1, col 2, col 3 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: cint (type: int), cdouble (type: double), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)) mode: hash @@ -96,8 +98,10 @@ STAGE PLANS: Group By Operator aggregations: min(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: int), KEY._col1 (type: double), KEY._col2 (type: decimal(20,10)), KEY._col3 (type: decimal(23,14)) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_string_concat.q.out ql/src/test/results/clientpositive/vector_string_concat.q.out index 00f9b38..9f6fe7d 100644 --- ql/src/test/results/clientpositive/vector_string_concat.q.out +++ ql/src/test/results/clientpositive/vector_string_concat.q.out @@ -346,9 +346,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 19 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: _col0 (type: string) mode: hash @@ -381,8 +383,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vector_tablesample_rows.q.out ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index c96ea00..fd9908f 100644 --- ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -1,7 +1,7 @@ -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select 'key1', 'value1' from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY Explain @@ -52,6 +52,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: string, string Stage: Stage-0 Fetch Operator @@ -77,11 +83,11 @@ POSTHOOK: query: create table decimal_2 (t decimal(18,9)) stored as orc POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail insert overwrite table decimal_2 select cast('17.29' as decimal(4,2)) from alltypesorc tablesample (1 rows) POSTHOOK: type: QUERY @@ -140,6 +146,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(18,9) Stage: Stage-7 Conditional Operator @@ -213,10 +225,10 @@ POSTHOOK: query: drop table decimal_2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@decimal_2 POSTHOOK: Output: default@decimal_2 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select count(1) from (select * from (Select 1 a) x order by x.a) y POSTHOOK: type: QUERY Explain @@ -256,8 +268,10 @@ STAGE PLANS: Group By Operator aggregations: count(1) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0 @@ -294,6 +308,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: _col0:bigint + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -302,8 +321,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -332,10 +353,10 @@ POSTHOOK: Input: _dummy_database@_dummy_table #### A masked pattern was here #### _c0 1 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail create temporary table dual as select 1 PREHOOK: type: CREATETABLE_AS_SELECT -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail create temporary table dual as select 1 POSTHOOK: type: CREATETABLE_AS_SELECT Explain diff --git ql/src/test/results/clientpositive/vector_when_case_null.q.out ql/src/test/results/clientpositive/vector_when_case_null.q.out index 5ae4b99..e002336 100644 --- ql/src/test/results/clientpositive/vector_when_case_null.q.out +++ ql/src/test/results/clientpositive/vector_when_case_null.q.out @@ -52,9 +52,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: _col0 (type: string) mode: hash @@ -88,8 +90,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vectorization_1.q.out ql/src/test/results/clientpositive/vectorization_1.q.out index e0a4344..9c2ce2a 100644 --- ql/src/test/results/clientpositive/vectorization_1.q.out +++ ql/src/test/results/clientpositive/vectorization_1.q.out @@ -1,3 +1,149 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT VAR_POP(ctinyint), + (VAR_POP(ctinyint) / -26.28), + SUM(cfloat), + (-1.389 + SUM(cfloat)), + (SUM(cfloat) * (-1.389 + SUM(cfloat))), + MAX(ctinyint), + (-((SUM(cfloat) * (-1.389 + SUM(cfloat))))), + MAX(cint), + (MAX(cint) * 79.553), + VAR_SAMP(cdouble), + (10.175 % (-((SUM(cfloat) * (-1.389 + SUM(cfloat)))))), + COUNT(cint), + (-563 % MAX(cint)) +FROM alltypesorc +WHERE (((cdouble > ctinyint) + AND (cboolean2 > 0)) + OR ((cbigint < ctinyint) + OR ((cint > cbigint) + OR (cboolean1 < 0)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterLongColGreaterLongScalar(col 11, val 0) -> boolean) -> boolean, FilterLongColLessLongColumn(col 3, col 0)(children: col 0) -> boolean, FilterLongColGreaterLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean) -> boolean + predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5] + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + Group By Vectorization: + aggregators: VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFMaxLong(col 2) -> int, VectorUDAFVarSampDouble(col 5) -> struct, VectorUDAFCount(col 2) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 / -26.28) (type: double), _col1 (type: double), (-1.389 + _col1) (type: double), (_col1 * (-1.389 + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389 + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175 % (- (_col1 * (-1.389 + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT VAR_POP(ctinyint), (VAR_POP(ctinyint) / -26.28), SUM(cfloat), diff --git ql/src/test/results/clientpositive/vectorization_10.q.out ql/src/test/results/clientpositive/vectorization_10.q.out index 9dad4c4..d2d9bf6 100644 --- ql/src/test/results/clientpositive/vectorization_10.q.out +++ ql/src/test/results/clientpositive/vectorization_10.q.out @@ -1,3 +1,117 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cdouble, + ctimestamp1, + ctinyint, + cboolean1, + cstring1, + (-(cdouble)), + (cdouble + csmallint), + ((cdouble + csmallint) % 33), + (-(cdouble)), + (ctinyint % cdouble), + (ctinyint % csmallint), + (-(cdouble)), + (cbigint * (ctinyint % csmallint)), + (9763215.5639 - (cdouble + csmallint)), + (-((-(cdouble)))) +FROM alltypesorc +WHERE (((cstring2 <= '10') + OR ((ctinyint > cdouble) + AND (-5638.15 >= ctinyint))) + OR ((cdouble > 6981) + AND ((csmallint = 9763215.5639) + OR (cstring1 LIKE '%a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25] + selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(6,2), decimal(11,4), double, double, double, double, double, bigint, bigint, bigint, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cdouble, ctimestamp1, ctinyint, diff --git ql/src/test/results/clientpositive/vectorization_11.q.out ql/src/test/results/clientpositive/vectorization_11.q.out index dff58da..bc03170 100644 --- ql/src/test/results/clientpositive/vectorization_11.q.out +++ ql/src/test/results/clientpositive/vectorization_11.q.out @@ -1,3 +1,99 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cstring1, + cboolean1, + cdouble, + ctimestamp1, + (-3728 * csmallint), + (cdouble - 9763215.5639), + (-(cdouble)), + ((-(cdouble)) + 6981), + (cdouble * -5638.15) +FROM alltypesorc +WHERE ((cstring2 = cstring1) + OR ((ctimestamp1 IS NULL) + AND (cstring1 LIKE '%a'))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7, col 6) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean + predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean) + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0) (type: double), (cdouble * -5638.15) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [6, 10, 5, 8, 12, 13, 14, 16, 15] + selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1)(children: col 1) -> 12:long, DoubleColSubtractDoubleScalar(col 5, val 9763215.5639) -> 13:double, DoubleColUnaryMinus(col 5) -> 14:double, DoubleColAddDoubleScalar(col 15, val 6981.0)(children: DoubleColUnaryMinus(col 5) -> 15:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5, val -5638.15) -> 15:double + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [1, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, double, double, double, double + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cstring1, cboolean1, cdouble, diff --git ql/src/test/results/clientpositive/vectorization_12.q.out ql/src/test/results/clientpositive/vectorization_12.q.out index 6a7f69c..df3f047 100644 --- ql/src/test/results/clientpositive/vectorization_12.q.out +++ ql/src/test/results/clientpositive/vectorization_12.q.out @@ -1,3 +1,223 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cbigint, + cboolean1, + cstring1, + ctimestamp1, + cdouble, + (-6432 * cdouble), + (-(cbigint)), + COUNT(cbigint), + (cbigint * COUNT(cbigint)), + STDDEV_SAMP(cbigint), + ((-6432 * cdouble) / -6432), + (-(((-6432 * cdouble) / -6432))), + AVG(cdouble), + (-((-6432 * cdouble))), + (-5638.15 + cbigint), + SUM(cbigint), + (AVG(cdouble) / (-6432 * cdouble)), + AVG(cdouble), + (-((-(((-6432 * cdouble) / -6432))))), + (((-6432 * cdouble) / -6432) + (-((-6432 * cdouble)))), + STDDEV_POP(cdouble) +FROM alltypesorc +WHERE (((ctimestamp1 IS NULL) + AND ((cboolean1 >= cboolean2) + OR (ctinyint != csmallint))) + AND ((cstring1 LIKE '%a') + OR ((cboolean2 <= 1) + AND (cbigint >= csmallint)))) +GROUP BY cbigint, cboolean1, cstring1, ctimestamp1, cdouble +ORDER BY ctimestamp1, cdouble, cbigint, cstring1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean) + Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) + outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [3, 5, 6, 10] + Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 3) -> bigint, VectorUDAFStdSampLong(col 3) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFSumLong(col 3) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 3, col 6, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + sort order: ++++ + Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE + value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] + Reduce Output Operator + key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 19 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18] + dataColumns: _col0:bigint, _col1:boolean, _col2:string, _col3:double, _col4:double, _col5:bigint, _col6:bigint, _col7:bigint, _col8:double, _col9:double, _col10:double, _col11:double, _col12:double, _col13:decimal(22,2), _col14:bigint, _col15:double, _col17:double, _col18:double, _col19:double + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cbigint, cboolean1, cstring1, diff --git ql/src/test/results/clientpositive/vectorization_13.q.out ql/src/test/results/clientpositive/vectorization_13.q.out index 35c704e..5852972 100644 --- ql/src/test/results/clientpositive/vectorization_13.q.out +++ ql/src/test/results/clientpositive/vectorization_13.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -31,7 +31,7 @@ GROUP BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1 ORDER BY cboolean1, ctinyint, ctimestamp1, cfloat, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16 LIMIT 40 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, ctinyint, ctimestamp1, @@ -103,11 +103,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -116,17 +117,28 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(11,4) Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -135,8 +147,10 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial @@ -179,6 +193,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 21 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + dataColumns: _col0:boolean, _col1:tinyint, _col2:timestamp, _col3:float, _col4:string, _col5:tinyint, _col6:tinyint, _col7:tinyint, _col8:double, _col9:double, _col10:double, _col11:float, _col12:double, _col13:double, _col14:double, _col15:decimal(7,3), _col16:double, _col17:double, _col18:float, _col19:double, _col20:tinyint + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -418,11 +437,12 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMaxLong(col 0) -> tinyint, VectorUDAFSumDouble(col 4) -> double, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFMinLong(col 0) -> tinyint className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 10, col 0, col 8, col 4, col 6 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopLong(col 0) -> struct output type STRUCT requires PRIMITIVE IS false keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -431,13 +451,18 @@ STAGE PLANS: key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -450,8 +475,10 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index ec4f7cd..c6bd7cf 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -31,7 +31,7 @@ WHERE (((ctinyint <= cbigint) GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble ORDER BY cstring1, cfloat, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cfloat, cstring1, @@ -80,15 +80,36 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterTimestampColLessTimestampColumn(col 9, col 8) -> boolean) -> boolean, FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3, val -257) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [8, 4, 6, 10, 5, 13] + selectExpressions: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5) -> 12:double) -> 13:double Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 13) -> struct, VectorUDAFMaxDouble(col 4) -> float, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFCount(col 4) -> bigint, VectorUDAFVarPopDouble(col 4) -> struct, VectorUDAFVarSampDouble(col 4) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 6, col 4, col 5, col 8, col 10 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -97,17 +118,28 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -115,6 +147,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -134,9 +172,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Execution mode: vectorized @@ -148,6 +194,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 22 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] + dataColumns: _col0:timestamp, _col1:float, _col2:string, _col3:boolean, _col4:double, _col5:double, _col6:double, _col7:double, _col8:float, _col9:float, _col10:float, _col11:float, _col12:double, _col13:double, _col14:bigint, _col15:double, _col16:double, _col17:double, _col18:double, _col19:double, _col20:double, _col21:double + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_15.q.out ql/src/test/results/clientpositive/vectorization_15.q.out index 05b8b14..8f0a879 100644 --- ql/src/test/results/clientpositive/vectorization_15.q.out +++ ql/src/test/results/clientpositive/vectorization_15.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -29,7 +29,7 @@ WHERE (((cstring2 LIKE '%ss%') GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cboolean1, cdouble, @@ -76,15 +76,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean, FilterStringColLikeStringScalar(col 6, pattern 10%) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -75) -> boolean, FilterLongColEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 5, val -3728.0) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 4, 5, 6, 8, 10] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFMinDouble(col 5) -> double, VectorUDAFStdSampLong(col 0) -> struct, VectorUDAFVarPopLong(col 0) -> struct, VectorUDAFVarSampLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 4, col 10, col 5, col 6, col 0, col 2, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -93,24 +113,39 @@ STAGE PLANS: key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reduce Vectorization: enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -130,9 +165,17 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Execution mode: vectorized @@ -144,10 +187,14 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 21 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + dataColumns: _col0:float, _col1:boolean, _col2:double, _col3:string, _col4:tinyint, _col5:int, _col6:timestamp, _col7:double, _col8:decimal(13,2), _col9:double, _col10:double, _col11:float, _col12:double, _col13:double, _col14:double, _col15:tinyint, _col16:double, _col17:float, _col18:int, _col19:decimal(13,2), _col20:double + partitionColumnCount: 0 Reduce Vectorization: enabled: false - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true - enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) diff --git ql/src/test/results/clientpositive/vectorization_16.q.out ql/src/test/results/clientpositive/vectorization_16.q.out index 2e3a34d..930b476 100644 --- ql/src/test/results/clientpositive/vectorization_16.q.out +++ ql/src/test/results/clientpositive/vectorization_16.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -53,15 +53,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -70,17 +90,27 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -88,6 +118,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git ql/src/test/results/clientpositive/vectorization_17.q.out ql/src/test/results/clientpositive/vectorization_17.q.out index 7590703..c018608 100644 --- ql/src/test/results/clientpositive/vectorization_17.q.out +++ ql/src/test/results/clientpositive/vectorization_17.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -22,7 +22,7 @@ WHERE (((cbigint > -23) OR (cfloat = cdouble)))) ORDER BY cbigint, cfloat PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cfloat, cstring1, cint, @@ -61,16 +61,33 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean) Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17] + selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized @@ -82,6 +99,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: decimal(13,3), double, double, bigint, double, double, double, double, decimal(19,0), decimal(11,4), double Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_2.q.out ql/src/test/results/clientpositive/vectorization_2.q.out index 709a75f..47ff8d4 100644 --- ql/src/test/results/clientpositive/vectorization_2.q.out +++ ql/src/test/results/clientpositive/vectorization_2.q.out @@ -1,3 +1,153 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(csmallint), + (AVG(csmallint) % -563), + (AVG(csmallint) + 762), + SUM(cfloat), + VAR_POP(cbigint), + (-(VAR_POP(cbigint))), + (SUM(cfloat) - AVG(csmallint)), + COUNT(*), + (-((SUM(cfloat) - AVG(csmallint)))), + (VAR_POP(cbigint) - 762), + MIN(ctinyint), + ((-(VAR_POP(cbigint))) + MIN(ctinyint)), + AVG(cdouble), + (((-(VAR_POP(cbigint))) + MIN(ctinyint)) - SUM(cfloat)) +FROM alltypesorc +WHERE (((ctimestamp1 < ctimestamp2) + AND ((cstring2 LIKE 'b%') + AND (cfloat <= -5638.15))) + OR ((cdouble < ctinyint) + AND ((-10669 != ctimestamp2) + OR (359 > cint)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8, col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern b%) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarNotEqualDoubleColumn(val -10669.0, col 12)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterLongScalarGreaterLongColumn(val 359, col 2) -> boolean) -> boolean) -> boolean) -> boolean + predicate: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((-10669.0 <> UDFToDouble(ctimestamp2)) or (359 > cint)))) (type: boolean) + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 3, 4, 5] + Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + Group By Vectorization: + aggregators: VectorUDAFAvgLong(col 1) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFVarPopLong(col 3) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0) -> tinyint, VectorUDAFAvgDouble(col 5) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 % -563.0) (type: double), (_col0 + 762.0) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT AVG(csmallint), (AVG(csmallint) % -563), (AVG(csmallint) + 762), diff --git ql/src/test/results/clientpositive/vectorization_3.q.out ql/src/test/results/clientpositive/vectorization_3.q.out index 2398dee..a730ca6 100644 --- ql/src/test/results/clientpositive/vectorization_3.q.out +++ ql/src/test/results/clientpositive/vectorization_3.q.out @@ -1,4 +1,159 @@ WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT STDDEV_SAMP(csmallint), + (STDDEV_SAMP(csmallint) - 10.175), + STDDEV_POP(ctinyint), + (STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)), + (-(STDDEV_POP(ctinyint))), + (STDDEV_SAMP(csmallint) % 79.553), + (-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))), + STDDEV_SAMP(cfloat), + (-(STDDEV_SAMP(csmallint))), + SUM(cfloat), + ((-((STDDEV_SAMP(csmallint) * (STDDEV_SAMP(csmallint) - 10.175)))) / (STDDEV_SAMP(csmallint) - 10.175)), + (-((STDDEV_SAMP(csmallint) - 10.175))), + AVG(cint), + (-3728 - STDDEV_SAMP(csmallint)), + STDDEV_POP(cint), + (AVG(cint) / STDDEV_SAMP(cfloat)) +FROM alltypesorc +WHERE (((cint <= cfloat) + AND ((79.553 != cbigint) + AND (ctimestamp2 = -29071))) + OR ((cbigint > cdouble) + AND ((79.553 <= csmallint) + AND (ctimestamp1 > ctimestamp2)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -29071.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 1) -> 14:decimal(8,3)) -> boolean, FilterTimestampColGreaterTimestampColumn(col 8, col 9) -> boolean) -> boolean) -> boolean + predicate: (((UDFToFloat(cint) <= cfloat) and (79.553 <> CAST( cbigint AS decimal(22,3))) and (UDFToDouble(ctimestamp2) = -29071.0)) or ((UDFToDouble(cbigint) > cdouble) and (79.553 <= CAST( csmallint AS decimal(8,3))) and (ctimestamp1 > ctimestamp2))) (type: boolean) + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) + outputColumnNames: ctinyint, csmallint, cint, cfloat + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2, 4] + Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + Group By Vectorization: + aggregators: VectorUDAFStdSampLong(col 1) -> struct, VectorUDAFStdPopLong(col 0) -> struct, VectorUDAFStdSampDouble(col 4) -> struct, VectorUDAFSumDouble(col 4) -> double, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, decimal(22,3), decimal(8,3) + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (_col0 - 10.175) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175)) (type: double), (- _col1) (type: double), (_col0 % 79.553) (type: double), (- (_col0 * (_col0 - 10.175))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175))) / (_col0 - 10.175)) (type: double), (- (_col0 - 10.175)) (type: double), _col4 (type: double), (-3728.0 - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT STDDEV_SAMP(csmallint), (STDDEV_SAMP(csmallint) - 10.175), STDDEV_POP(ctinyint), diff --git ql/src/test/results/clientpositive/vectorization_4.q.out ql/src/test/results/clientpositive/vectorization_4.q.out index 0d6829f..0199d7d 100644 --- ql/src/test/results/clientpositive/vectorization_4.q.out +++ ql/src/test/results/clientpositive/vectorization_4.q.out @@ -1,3 +1,152 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT SUM(cint), + (SUM(cint) * -563), + (-3728 + SUM(cint)), + STDDEV_POP(cdouble), + (-(STDDEV_POP(cdouble))), + AVG(cdouble), + ((SUM(cint) * -563) % SUM(cint)), + (((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)), + VAR_POP(cdouble), + (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble)))), + ((-3728 + SUM(cint)) - (SUM(cint) * -563)), + MIN(ctinyint), + MIN(ctinyint), + (MIN(ctinyint) * (-((((SUM(cint) * -563) % SUM(cint)) / AVG(cdouble))))) +FROM alltypesorc +WHERE (((csmallint >= cint) + OR ((-89010 >= ctinyint) + AND (cdouble > 79.553))) + OR ((-563 != cbigint) + AND ((ctinyint != cbigint) + OR (-3728 >= cdouble)))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterLongScalarGreaterEqualLongColumn(val -89010, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterDoubleScalar(col 5, val 79.553) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -563, col 3) -> boolean, FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val -3728.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((UDFToInteger(csmallint) >= cint) or ((-89010 >= UDFToInteger(ctinyint)) and (cdouble > 79.553)) or ((-563 <> cbigint) and ((UDFToLong(ctinyint) <> cbigint) or (-3728.0 >= cdouble)))) (type: boolean) + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) + outputColumnNames: ctinyint, cint, cdouble + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 2, 5] + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFStdPopDouble(col 5) -> struct, VectorUDAFAvgDouble(col 5) -> struct, VectorUDAFVarPopDouble(col 5) -> struct, VectorUDAFMinLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), (_col0 * -563) (type: bigint), (-3728 + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2)) (type: double), ((-3728 + _col0) - (_col0 * -563)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563) % _col0)) / _col2))) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT SUM(cint), (SUM(cint) * -563), (-3728 + SUM(cint)), diff --git ql/src/test/results/clientpositive/vectorization_5.q.out ql/src/test/results/clientpositive/vectorization_5.q.out index 914a626..33707c7 100644 --- ql/src/test/results/clientpositive/vectorization_5.q.out +++ ql/src/test/results/clientpositive/vectorization_5.q.out @@ -1,3 +1,147 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT MAX(csmallint), + (MAX(csmallint) * -75), + COUNT(*), + ((MAX(csmallint) * -75) / COUNT(*)), + (6981 * MAX(csmallint)), + MIN(csmallint), + (-(MIN(csmallint))), + (197 % ((MAX(csmallint) * -75) / COUNT(*))), + SUM(cint), + MAX(ctinyint), + (-(MAX(ctinyint))), + ((-(MAX(ctinyint))) + MAX(ctinyint)) +FROM alltypesorc +WHERE (((cboolean2 IS NOT NULL) + AND (cstring1 LIKE '%b%')) + OR ((ctinyint = cdouble) + AND ((ctimestamp2 IS NOT NULL) + AND (cstring2 LIKE 'a')))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %b%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern a) -> boolean) -> boolean) -> boolean + predicate: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean) + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) + outputColumnNames: ctinyint, csmallint, cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 1, 2] + Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1) -> smallint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 1) -> smallint, VectorUDAFSumLong(col 2) -> bigint, VectorUDAFMaxLong(col 0) -> tinyint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 5, 6, 7, 9, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), max(VALUE._col4) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), (UDFToInteger(_col0) * -75) (type: int), _col1 (type: bigint), (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1)) (type: double), (6981 * UDFToInteger(_col0)) (type: int), _col2 (type: smallint), (- _col2) (type: smallint), (197.0 % (UDFToDouble((UDFToInteger(_col0) * -75)) / UDFToDouble(_col1))) (type: double), _col3 (type: bigint), _col4 (type: tinyint), (- _col4) (type: tinyint), ((- _col4) + _col4) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT MAX(csmallint), (MAX(csmallint) * -75), COUNT(*), diff --git ql/src/test/results/clientpositive/vectorization_6.q.out ql/src/test/results/clientpositive/vectorization_6.q.out index 13897f6..9f3da46 100644 --- ql/src/test/results/clientpositive/vectorization_6.q.out +++ ql/src/test/results/clientpositive/vectorization_6.q.out @@ -1,3 +1,111 @@ +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT cboolean1, + cfloat, + cstring1, + (988888 * csmallint), + (-(csmallint)), + (-(cfloat)), + (-26.28 / cfloat), + (cfloat * 359), + (cint % ctinyint), + (-(cdouble)), + (ctinyint - -75), + (762 * (cint % ctinyint)) +FROM alltypesorc +WHERE ((ctinyint != 0) + AND ((((cboolean1 <= 0) + AND (cboolean2 >= cboolean1)) + OR ((cbigint IS NOT NULL) + AND ((cstring2 LIKE '%a') + OR (cfloat <= -257)))))) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10, val 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 11, col 10) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 3) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %a) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -257.0) -> boolean) -> boolean) -> boolean) -> boolean) -> boolean + predicate: ((ctinyint <> 0) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean) + Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28 / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [10, 4, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21] + selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1)(children: col 1) -> 12:long, LongColUnaryMinus(col 1) -> 13:long, DoubleColUnaryMinus(col 4) -> 14:double, DoubleScalarDivideDoubleColumn(val -26.28, col 4)(children: col 4) -> 15:double, DoubleColMultiplyDoubleScalar(col 4, val 359.0) -> 16:double, LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 17:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColSubtractLongScalar(col 0, val -75)(children: col 0) -> 19:long, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 20:long) -> 21:long + Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 10, 11] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: bigint, bigint, double, double, double, bigint, double, bigint, bigint, bigint + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT cboolean1, cfloat, cstring1, diff --git ql/src/test/results/clientpositive/vectorization_7.q.out ql/src/test/results/clientpositive/vectorization_7.q.out index c05fee0..218d307 100644 --- ql/src/test/results/clientpositive/vectorization_7.q.out +++ ql/src/test/results/clientpositive/vectorization_7.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -25,7 +25,7 @@ WHERE ((ctinyint != 0) ORDER BY cboolean1, cbigint, csmallint, ctinyint, ctimestamp1, cstring1, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 25 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cboolean1, cbigint, csmallint, @@ -105,6 +105,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [0, 1, 2, 3, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint, bigint Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_8.q.out ql/src/test/results/clientpositive/vectorization_8.q.out index ce2a4b5..e56fb53 100644 --- ql/src/test/results/clientpositive/vectorization_8.q.out +++ ql/src/test/results/clientpositive/vectorization_8.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -23,7 +23,7 @@ WHERE (((cstring2 IS NOT NULL) ORDER BY ctimestamp1, cdouble, cboolean1, cstring1, cfloat, c1, c2, c3, c4, c5, c6, c7, c8, c9 LIMIT 20 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ctimestamp1, cdouble, cboolean1, @@ -101,6 +101,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 + scratchColumnTypeNames: double, double, double, double, double, double, double, double, double, double, double Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true diff --git ql/src/test/results/clientpositive/vectorization_9.q.out ql/src/test/results/clientpositive/vectorization_9.q.out index 2e3a34d..930b476 100644 --- ql/src/test/results/clientpositive/vectorization_9.q.out +++ ql/src/test/results/clientpositive/vectorization_9.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: EXPLAIN VECTORIZATION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -18,7 +18,7 @@ WHERE ((cstring2 LIKE '%b%') OR (cstring1 < 'a'))) GROUP BY cstring1, cdouble, ctimestamp1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT cstring1, cdouble, ctimestamp1, @@ -53,15 +53,35 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) outputColumnNames: cdouble, cstring1, ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [5, 6, 8] Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + Group By Vectorization: + aggregators: VectorUDAFCount(col 5) -> bigint, VectorUDAFStdSampDouble(col 5) -> struct, VectorUDAFMinDouble(col 5) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 5, col 6, col 8 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2] keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 @@ -70,17 +90,27 @@ STAGE PLANS: key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) sort order: +++ Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [5, 6, 7, 8] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -88,6 +118,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 diff --git ql/src/test/results/clientpositive/vectorization_limit.q.out ql/src/test/results/clientpositive/vectorization_limit.q.out index 7381294..b46e6ef 100644 --- ql/src/test/results/clientpositive/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -221,13 +221,14 @@ STAGE PLANS: Group By Operator aggregations: avg(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 12) -> struct + aggregators: VectorUDAFAvgDouble(col 12) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 12) -> struct output type STRUCT requires PRIMITIVE IS false keys: _col0 (type: tinyint) mode: hash outputColumnNames: _col0, _col1 @@ -236,6 +237,11 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -243,7 +249,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -262,8 +268,10 @@ STAGE PLANS: Group By Operator aggregations: avg(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial @@ -349,9 +357,11 @@ STAGE PLANS: Group By Operator Group By Vectorization: className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0 native: false + vectorProcessingMode: HASH projectedOutputColumns: [] keys: ctinyint (type: tinyint) mode: hash @@ -389,8 +399,10 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial @@ -478,9 +490,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(col 5) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 0, col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: ctinyint (type: tinyint), cdouble (type: double) mode: hash @@ -519,8 +533,10 @@ STAGE PLANS: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial @@ -636,9 +652,11 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 0) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true keyExpressions: col 5 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] keys: cdouble (type: double) mode: hash @@ -677,8 +695,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: double) mode: mergepartial diff --git ql/src/test/results/clientpositive/vectorization_pushdown.q.out ql/src/test/results/clientpositive/vectorization_pushdown.q.out index 664a9ba..183cbdc 100644 --- ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -39,7 +39,7 @@ STAGE PLANS: Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false diff --git ql/src/test/results/clientpositive/vectorized_case.q.out ql/src/test/results/clientpositive/vectorized_case.q.out index b1b5e54..0a2e803 100644 --- ql/src/test/results/clientpositive/vectorized_case.q.out +++ ql/src/test/results/clientpositive/vectorized_case.q.out @@ -276,8 +276,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -308,8 +310,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 @@ -387,8 +391,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFSumLong(col 12) -> bigint, VectorUDAFSumLong(col 13) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 @@ -419,8 +425,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index 4248d08..b7ac3f9 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -1239,8 +1239,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinLong(col 0) -> date, VectorUDAFMaxLong(col 0) -> date, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -1271,8 +1273,10 @@ STAGE PLANS: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index 81292ec..1fe1c69 100644 --- ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -16,9 +16,11 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dtest POSTHOOK: Lineage: dtest.a SCRIPT [] POSTHOOK: Lineage: dtest.b SIMPLE [] -PREHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest +PREHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization select sum(distinct a), count(distinct a) from dtest +POSTHOOK: query: explain vectorization detail +select sum(distinct a), count(distinct a) from dtest POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -35,13 +37,29 @@ STAGE PLANS: TableScan alias: dtest Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: a (type: int) outputColumnNames: a + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(DISTINCT a), count(DISTINCT a) bucketGroup: true + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0) -> bigint, VectorUDAFCount(col 0) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 0 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] keys: a (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -49,6 +67,11 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -59,6 +82,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: a:int, b:int + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -66,6 +94,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -92,9 +126,11 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@dtest #### A masked pattern was here #### 300 1 -PREHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +PREHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc +POSTHOOK: query: explain vectorization detail +select sum(distinct cint), count(distinct cint), avg(distinct cint), std(distinct cint) from alltypesorc POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -111,12 +147,28 @@ STAGE PLANS: TableScan alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: cint (type: int) outputColumnNames: cint + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [2] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(DISTINCT cint), count(DISTINCT cint), avg(DISTINCT cint), std(DISTINCT cint) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 2) -> bigint, VectorUDAFCount(col 2) -> bigint, VectorUDAFAvgLong(col 2) -> struct, VectorUDAFStdPopLong(col 2) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + keyExpressions: col 2 + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3] keys: cint (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -124,16 +176,26 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No DISTINCT columns IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 12 + includeColumns: [2] + dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -141,6 +203,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE diff --git ql/src/test/results/clientpositive/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index 32210ad..b915e87 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -88,24 +88,30 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 0) -> int, VectorUDAFAvgLong(col 2) -> struct + aggregators: VectorUDAFCount(col 0) -> bigint, VectorUDAFMaxLong(col 1) -> int, VectorUDAFMinLong(col 0) -> int, VectorUDAFAvgLong(col 2) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgLong(col 2) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -120,8 +126,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out index 52aa05b..5334c16 100644 --- ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out +++ ql/src/test/results/clientpositive/vectorized_mapjoin2.q.out @@ -108,8 +108,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFCount(ConstantVectorExpression(val 1) -> 0:long) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 @@ -142,8 +144,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 46e51f7..e096c72 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -351,13 +351,14 @@ STAGE PLANS: Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 0) -> int, VectorUDAFMinLong(col 2) -> smallint, VectorUDAFCount(col 5) -> bigint, VectorUDAFAvgDouble(col 3) -> struct, VectorUDAFStdPopDouble(col 4) -> struct, VectorUDAFMaxDecimal(col 10) -> decimal(4,2) className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true keyExpressions: col 1 native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDouble(col 3) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopDouble(col 4) -> struct output type STRUCT requires PRIMITIVE IS false keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -366,13 +367,18 @@ STAGE PLANS: key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat allNative: false usesVectorUDFAdaptor: false @@ -385,8 +391,10 @@ STAGE PLANS: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null keys: KEY._col0 (type: tinyint) mode: mergepartial diff --git ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index d42369f..d1d5e55 100644 --- ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -73,8 +73,10 @@ STAGE PLANS: Group By Operator aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) Group By Vectorization: + groupByMode: HASH vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -90,16 +92,27 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat - notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col3] not supported - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -108,8 +121,10 @@ STAGE PLANS: Group By Operator aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 diff --git ql/src/test/results/clientpositive/vectorized_timestamp.q.out ql/src/test/results/clientpositive/vectorized_timestamp.q.out index df8297c..e229215 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -17,10 +17,10 @@ POSTHOOK: query: INSERT INTO TABLE test VALUES ('0001-01-01 00:00:00.000000000') POSTHOOK: type: QUERY POSTHOOK: Output: default@test POSTHOOK: Lineage: test.ts EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -66,15 +66,43 @@ POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 +PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +PREHOOK: query: SELECT ts FROM test +PREHOOK: type: QUERY +PREHOOK: Input: default@test +#### A masked pattern was here #### +POSTHOOK: query: SELECT ts FROM test +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test +#### A masked pattern was here #### +0001-01-01 00:00:00 +9999-12-31 23:59:59.999999999 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -87,29 +115,65 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Select Operator expressions: ts (type: timestamp) outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ts), max(ts) Group By Vectorization: - vectorOutput: false + aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true native: false - projectedOutputColumns: null + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1] mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1 @@ -141,15 +205,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') POSTHOOK: type: QUERY PLAN VECTORIZATION: - enabled: false - enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage @@ -162,78 +226,34 @@ STAGE PLANS: TableScan alias: test Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterTimestampColumnInList(col 0, values [0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0]) -> boolean predicate: (ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) (type: boolean) Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ts (type: timestamp) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') -PREHOOK: type: QUERY -PREHOOK: Input: default@test -#### A masked pattern was here #### -POSTHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test -#### A masked pattern was here #### -0001-01-01 00:00:00 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test -POSTHOOK: type: QUERY -PLAN VECTORIZATION: - enabled: true - enabledConditionsMet: [hive.vectorized.execution.enabled IS true] - -STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - -STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: test - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0] - Select Operator - expressions: ts (type: timestamp) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Map Vectorization: enabled: true @@ -243,6 +263,11 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 Stage: Stage-0 Fetch Operator @@ -250,21 +275,20 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT ts FROM test +PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY PREHOOK: Input: default@test #### A masked pattern was here #### -POSTHOOK: query: SELECT ts FROM test +POSTHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### 0001-01-01 00:00:00 -9999-12-31 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -293,16 +317,18 @@ STAGE PLANS: projectedOutputColumns: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ts), max(ts) + aggregations: avg(ts) Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false - projectedOutputColumns: [0, 1] + vectorProcessingMode: HASH + projectedOutputColumns: [0] mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -310,8 +336,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true @@ -321,27 +347,34 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1) + aggregations: avg(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: timestamp), (_col1 - _col0) (type: interval_day_time) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + expressions: _col0 (type: double), CAST( _col0 AS TIMESTAMP) (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -353,20 +386,20 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +PREHOOK: query: SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test PREHOOK: type: QUERY PREHOOK: Input: default@test #### A masked pattern was here #### -POSTHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test +POSTHOOK: query: SELECT AVG(ts), CAST(AVG(ts) AS TIMESTAMP) FROM test POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### -0001-01-01 00:00:00 9999-12-31 23:59:59.999999999 3652060 23:59:59.999999999 -PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +9.56332944E10 5000-07-01 13:00:00 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN VECTORIZATION EXPRESSION -SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -386,31 +419,36 @@ STAGE PLANS: TableScan Vectorization: native: true projectedOutputColumns: [0] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: ts + Select Vectorization: + className: VectorSelectOperator native: true - predicateExpression: FilterTimestampColumnInList(col 0, values [0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0]) -> boolean - predicate: (ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) (type: boolean) - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ts (type: timestamp) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator + projectedOutputColumns: [0] + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) + Group By Vectorization: + aggregators: VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator native: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true @@ -420,6 +458,34 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: ts:timestamp + partitionColumnCount: 0 + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: variance(VALUE._col0), var_pop(VALUE._col1), var_samp(VALUE._col2), std(VALUE._col3), stddev(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6) + Group By Vectorization: + groupByMode: MERGEPARTIAL + vectorOutput: false + native: false + vectorProcessingMode: NONE + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -427,12 +493,12 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +PREHOOK: query: SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test PREHOOK: type: QUERY PREHOOK: Input: default@test #### A masked pattern was here #### -POSTHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') +POSTHOOK: query: SELECT variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) FROM test POSTHOOK: type: QUERY POSTHOOK: Input: default@test #### A masked pattern was here #### -0001-01-01 00:00:00 +2.489106846793884E22 2.489106846793884E22 4.978213693587768E22 1.577690352E11 1.577690352E11 1.577690352E11 2.2311910930235822E11 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index a4536fd..4bb3564 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -731,8 +731,10 @@ STAGE PLANS: Group By Vectorization: aggregators: VectorUDAFMinTimestamp(col 0) -> timestamp, VectorUDAFMaxTimestamp(col 0) -> timestamp, VectorUDAFCount(col 0) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator + groupByMode: HASH vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 @@ -763,8 +765,10 @@ STAGE PLANS: Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 @@ -825,25 +829,48 @@ STAGE PLANS: TableScan alias: alltypesorc_string Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1] Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(ctimestamp1) + Group By Vectorization: + aggregators: VectorUDAFSumTimestamp(col 0) -> double + className: VectorGroupByOperator + groupByMode: HASH + vectorOutput: true + native: false + vectorProcessingMode: HASH + projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) + Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: Aggregation Function expression for GROUPBY operator: Vectorization of aggreation should have succeeded org.apache.hadoop.hive.ql.metadata.HiveException: Vector aggregate not implemented: "sum" for type: "TIMESTAMP (UDAF evaluator mode = PARTIAL1) - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true @@ -852,8 +879,10 @@ STAGE PLANS: Group By Operator aggregations: sum(VALUE._col0) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0 @@ -940,24 +969,30 @@ STAGE PLANS: Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct + aggregators: VectorUDAFAvgTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarPopTimestamp(col 0) -> struct, VectorUDAFVarSampTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdPopTimestamp(col 0) -> struct, VectorUDAFStdSampTimestamp(col 0) -> struct className: VectorGroupByOperator - vectorOutput: false + groupByMode: HASH + vectorOutput: true native: false + vectorProcessingMode: HASH projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7] - vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: false + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false @@ -970,8 +1005,10 @@ STAGE PLANS: Group By Operator aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) Group By Vectorization: + groupByMode: MERGEPARTIAL vectorOutput: false native: false + vectorProcessingMode: NONE projectedOutputColumns: null mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 diff --git vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java index 926321e..b87ec55 100644 --- vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java +++ vector-code-gen/src/org/apache/hadoop/hive/tools/GenVectorCode.java @@ -24,6 +24,11 @@ import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.tools.ant.BuildException; import org.apache.tools.ant.Task; @@ -1004,51 +1009,144 @@ {"VectorUDAFMinMaxIntervalDayTime", "VectorUDAFMinIntervalDayTime", ">", "min", "_FUNC_(expr) - Returns the minimum value of expr (vectorized, type: interval_day_time)"}, - //template, , - {"VectorUDAFSum", "VectorUDAFSumLong", "long"}, - {"VectorUDAFSum", "VectorUDAFSumDouble", "double"}, - {"VectorUDAFAvg", "VectorUDAFAvgLong", "long"}, - {"VectorUDAFAvg", "VectorUDAFAvgDouble", "double"}, + // Template, , + {"VectorUDAFSum", "VectorUDAFSumLong", "long"}, + {"VectorUDAFSum", "VectorUDAFSumDouble", "double"}, + + // Template, , , + {"VectorUDAFAvg", "VectorUDAFAvgLong", "long", "PARTIAL1"}, + {"VectorUDAFAvg", "VectorUDAFAvgLongComplete", "long", "COMPLETE"}, + + {"VectorUDAFAvg", "VectorUDAFAvgDouble", "double", "PARTIAL1"}, + {"VectorUDAFAvg", "VectorUDAFAvgDoubleComplete", "double", "COMPLETE"}, + + {"VectorUDAFAvgDecimal", "VectorUDAFAvgDecimal", "PARTIAL1"}, + {"VectorUDAFAvgDecimal", "VectorUDAFAvgDecimalComplete", "COMPLETE"}, + + {"VectorUDAFAvgTimestamp", "VectorUDAFAvgTimestamp", "PARTIAL1"}, + {"VectorUDAFAvgTimestamp", "VectorUDAFAvgTimestampComplete", "COMPLETE"}, + + //template, , , + {"VectorUDAFAvgMerge", "VectorUDAFAvgPartial2", "PARTIAL2"}, + {"VectorUDAFAvgMerge", "VectorUDAFAvgFinal", "FINAL"}, + + {"VectorUDAFAvgDecimalMerge", "VectorUDAFAvgDecimalPartial2", "PARTIAL2"}, + {"VectorUDAFAvgDecimalMerge", "VectorUDAFAvgDecimalFinal", "FINAL"}, + + // (since Timestamps are averaged with double, we don't need a PARTIAL2 class) + // (and, since Timestamps are output as double for AVG, we don't need a FINAL class, either) + // {"VectorUDAFAvgMerge", "VectorUDAFAvgTimestampPartial2", "PARTIAL2"}, + // {"VectorUDAFAvgMerge", "VectorUDAFAvgTimestampFinal", "FINAL"}, // template, , , , , // - {"VectorUDAFVar", "VectorUDAFVarPopLong", "long", "myagg.variance / myagg.count", + {"VectorUDAFVar", "VectorUDAFVarPopLong", "long", "PARTIAL1", "myagg.variance / myagg.count", "variance, var_pop", "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFVarPopDouble", "double", "myagg.variance / myagg.count", + {"VectorUDAFVar", "VectorUDAFVarPopLongComplete", "long", "COMPLETE,VARIANCE", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFVarPopDouble", "double", "PARTIAL1", "myagg.variance / myagg.count", "variance, var_pop", "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, double)"}, - {"VectorUDAFVarDecimal", "VectorUDAFVarPopDecimal", "myagg.variance / myagg.count", + {"VectorUDAFVar", "VectorUDAFVarPopDoubleComplete", "double", "COMPLETE,VARIANCE", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, double)"}, + {"VectorUDAFVarDecimal", "VectorUDAFVarPopDecimal", "PARTIAL1", "myagg.variance / myagg.count", "variance, var_pop", "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, decimal)"}, - {"VectorUDAFVar", "VectorUDAFVarSampLong", "long", "myagg.variance / (myagg.count-1.0)", + {"VectorUDAFVarDecimal", "VectorUDAFVarPopDecimalComplete", "COMPLETE,VARIANCE", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, timestamp)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFVarPopTimestamp", "PARTIAL1", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, timestamp)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFVarPopTimestampComplete", "COMPLETE,VARIANCE", "myagg.variance / myagg.count", + "variance, var_pop", + "_FUNC_(x) - Returns the variance of a set of numbers (vectorized, decimal)"}, + + {"VectorUDAFVar", "VectorUDAFVarSampLong", "long", "PARTIAL1", "myagg.variance / (myagg.count-1.0)", "var_samp", "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFVarSampDouble", "double", "myagg.variance / (myagg.count-1.0)", + {"VectorUDAFVar", "VectorUDAFVarSampLongComplete", "long", "COMPLETE,VARIANCE_SAMPLE", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFVarSampDouble", "double", "PARTIAL1", "myagg.variance / (myagg.count-1.0)", "var_samp", "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, double)"}, - {"VectorUDAFVarDecimal", "VectorUDAFVarSampDecimal", "myagg.variance / (myagg.count-1.0)", + {"VectorUDAFVar", "VectorUDAFVarSampDoubleComplete", "double", "COMPLETE,VARIANCE_SAMPLE", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, double)"}, + {"VectorUDAFVarDecimal", "VectorUDAFVarSampDecimal", "PARTIAL1", "myagg.variance / (myagg.count-1.0)", "var_samp", "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, decimal)"}, - {"VectorUDAFVar", "VectorUDAFStdPopLong", "long", + {"VectorUDAFVarDecimal", "VectorUDAFVarSampDecimalComplete", "COMPLETE,VARIANCE_SAMPLE", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, decimal)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFVarSampTimestamp", "PARTIAL1", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, timestamp)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFVarSampTimestampComplete", "COMPLETE,VARIANCE_SAMPLE", "myagg.variance / (myagg.count-1.0)", + "var_samp", + "_FUNC_(x) - Returns the sample variance of a set of numbers (vectorized, timestamp)"}, + + {"VectorUDAFVar", "VectorUDAFStdPopLong", "long", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFStdPopDouble", "double", + {"VectorUDAFVar", "VectorUDAFStdPopLongComplete", "long", "COMPLETE,STD", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFStdPopDouble", "double", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, double)"}, - {"VectorUDAFVarDecimal", "VectorUDAFStdPopDecimal", + {"VectorUDAFVar", "VectorUDAFStdPopDoubleComplete", "double", "COMPLETE,STD", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, double)"}, + {"VectorUDAFVarDecimal", "VectorUDAFStdPopDecimal", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, decimal)"}, - {"VectorUDAFVar", "VectorUDAFStdSampLong", "long", + {"VectorUDAFVarDecimal", "VectorUDAFStdPopDecimalComplete", "COMPLETE,STD", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, decimal)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFStdPopTimestamp", "PARTIAL1", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, timestamp)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFStdPopTimestampComplete", "COMPLETE,STD", + "Math.sqrt(myagg.variance / (myagg.count))", "std,stddev,stddev_pop", + "_FUNC_(x) - Returns the standard deviation of a set of numbers (vectorized, timestamp)"}, + + {"VectorUDAFVar", "VectorUDAFStdSampLong", "long", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, long)"}, - {"VectorUDAFVar", "VectorUDAFStdSampDouble", "double", + {"VectorUDAFVar", "VectorUDAFStdSampLongComplete", "long", "COMPLETE,STD_SAMPLE", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, long)"}, + {"VectorUDAFVar", "VectorUDAFStdSampDouble", "double", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, double)"}, - {"VectorUDAFVarDecimal", "VectorUDAFStdSampDecimal", + {"VectorUDAFVar", "VectorUDAFStdSampDoubleComplete", "double", "COMPLETE,STD_SAMPLE", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, double)"}, + {"VectorUDAFVarDecimal", "VectorUDAFStdSampDecimal", "PARTIAL1", "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, decimal)"}, - + {"VectorUDAFVarDecimal", "VectorUDAFStdSampDecimalComplete", "COMPLETE,STD_SAMPLE", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, decimal)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFStdSampTimestamp", "PARTIAL1", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, timestamp)"}, + {"VectorUDAFVarTimestamp", "VectorUDAFStdSampTimestampComplete", "COMPLETE,STD_SAMPLE", + "Math.sqrt(myagg.variance / (myagg.count-1.0))", "stddev_samp", + "_FUNC_(x) - Returns the sample standard deviation of a set of numbers (vectorized, timestamp)"}, + + //template, , , + {"VectorUDAFVarMerge", "VectorUDAFVarPartial2", "PARTIAL2"}, + + {"VectorUDAFVarMerge", "VectorUDAFVarPopFinal", "FINAL,VARIANCE"}, + {"VectorUDAFVarMerge", "VectorUDAFVarSampFinal", "FINAL,VARIANCE_SAMPLE"}, + {"VectorUDAFVarMerge", "VectorUDAFStdPopFinal", "FINAL,STD"}, + {"VectorUDAFVarMerge", "VectorUDAFStdSampFinal", "FINAL,STD_SAMPLE"}, }; @@ -1204,10 +1302,22 @@ private void generate() throws Exception { generateVectorUDAFSum(tdesc); } else if (tdesc[0].equals("VectorUDAFAvg")) { generateVectorUDAFAvg(tdesc); + } else if (tdesc[0].equals("VectorUDAFAvgMerge")) { + generateVectorUDAFAvgMerge(tdesc); + } else if (tdesc[0].equals("VectorUDAFAvgDecimal")) { + generateVectorUDAFAvgObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFAvgTimestamp")) { + generateVectorUDAFAvgObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFAvgDecimalMerge")) { + generateVectorUDAFAvgMerge(tdesc); } else if (tdesc[0].equals("VectorUDAFVar")) { generateVectorUDAFVar(tdesc); } else if (tdesc[0].equals("VectorUDAFVarDecimal")) { - generateVectorUDAFVarDecimal(tdesc); + generateVectorUDAFVarObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFVarTimestamp")) { + generateVectorUDAFVarObject(tdesc); + } else if (tdesc[0].equals("VectorUDAFVarMerge")) { + generateVectorUDAFVarMerge(tdesc); } else if (tdesc[0].equals("FilterStringGroupColumnCompareStringGroupScalarBase")) { generateFilterStringGroupColumnCompareStringGroupScalarBase(tdesc); } else if (tdesc[0].equals("FilterStringGroupColumnCompareStringScalar")) { @@ -1565,14 +1675,50 @@ private void generateVectorUDAFSum(String[] tdesc) throws Exception { private void generateVectorUDAFAvg(String[] tdesc) throws Exception { String className = tdesc[1]; String valueType = tdesc[2]; + String camelValueCaseType = getCamelCaseType(valueType); String columnType = getColumnVectorType(valueType); + String ifDefined = tdesc[3]; File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); String templateString = readFile(templateFile); templateString = templateString.replaceAll("", className); templateString = templateString.replaceAll("", valueType); + templateString = templateString.replaceAll("", camelValueCaseType); templateString = templateString.replaceAll("", columnType); + + templateString = evaluateIfDefined(templateString, ifDefined); + + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, + className, templateString); + } + + private void generateVectorUDAFAvgMerge(String[] tdesc) throws Exception { + String className = tdesc[1]; + String groupByMode = tdesc[2]; + + File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); + + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + + templateString = evaluateIfDefined(templateString, groupByMode); + + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, + className, templateString); + } + + private void generateVectorUDAFAvgObject(String[] tdesc) throws Exception { + String className = tdesc[1]; + String ifDefined = tdesc[2]; + + File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); + + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, className, templateString); } @@ -1580,9 +1726,10 @@ private void generateVectorUDAFAvg(String[] tdesc) throws Exception { private void generateVectorUDAFVar(String[] tdesc) throws Exception { String className = tdesc[1]; String valueType = tdesc[2]; - String varianceFormula = tdesc[3]; - String descriptionName = tdesc[4]; - String descriptionValue = tdesc[5]; + String ifDefined = tdesc[3]; + String varianceFormula = tdesc[4]; + String descriptionName = tdesc[5]; + String descriptionValue = tdesc[6]; String columnType = getColumnVectorType(valueType); File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); @@ -1594,26 +1741,48 @@ private void generateVectorUDAFVar(String[] tdesc) throws Exception { templateString = templateString.replaceAll("", varianceFormula); templateString = templateString.replaceAll("", descriptionName); templateString = templateString.replaceAll("", descriptionValue); + + templateString = evaluateIfDefined(templateString, ifDefined); + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, className, templateString); } - private void generateVectorUDAFVarDecimal(String[] tdesc) throws Exception { - String className = tdesc[1]; - String varianceFormula = tdesc[2]; - String descriptionName = tdesc[3]; - String descriptionValue = tdesc[4]; + private void generateVectorUDAFVarObject(String[] tdesc) throws Exception { + String className = tdesc[1]; + String ifDefined = tdesc[2]; + String varianceFormula = tdesc[3]; + String descriptionName = tdesc[4]; + String descriptionValue = tdesc[5]; - File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); + File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); - String templateString = readFile(templateFile); - templateString = templateString.replaceAll("", className); - templateString = templateString.replaceAll("", varianceFormula); - templateString = templateString.replaceAll("", descriptionName); - templateString = templateString.replaceAll("", descriptionValue); - writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, - className, templateString); - } + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", varianceFormula); + templateString = templateString.replaceAll("", descriptionName); + templateString = templateString.replaceAll("", descriptionValue); + + templateString = evaluateIfDefined(templateString, ifDefined); + + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, + className, templateString); + } + + private void generateVectorUDAFVarMerge(String[] tdesc) throws Exception { + String className = tdesc[1]; + String groupByMode = tdesc[2]; + + File templateFile = new File(joinPath(this.udafTemplateDirectory, tdesc[0] + ".txt")); + + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", className); + + templateString = evaluateIfDefined(templateString, groupByMode); + + writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, + className, templateString); + } private void generateFilterStringGroupScalarCompareStringGroupColumnBase(String[] tdesc) throws IOException { String operatorName = tdesc[1]; @@ -3126,6 +3295,102 @@ private static boolean isTimestampIntervalType(String type) { || type.equals("interval_day_time")); } + private boolean containsDefinedStrings(Set defineSet, String commaDefinedString) { + String[] definedStrings = commaDefinedString.split(","); + boolean result = false; + for (String definedString : definedStrings) { + if (defineSet.contains(definedString)) { + result = true; + break; + } + } + return result; + } + + private int doIfDefinedStatement(String[] lines, int index, Set definedSet, + boolean outerInclude, StringBuilder sb) { + String ifLine = lines[index]; + final int ifLineNumber = index + 1; + String commaDefinedString = ifLine.substring("#IF ".length()); + boolean includeBody = containsDefinedStrings(definedSet, commaDefinedString); + index++; + final int end = lines.length; + while (true) { + if (index >= end) { + throw new RuntimeException("Unmatched #IF at line " + index + " for " + commaDefinedString); + } + String line = lines[index]; + if (line.length() == 0 || line.charAt(0) != '#') { + if (outerInclude && includeBody) { + sb.append(line); + sb.append("\n"); + } + index++; + continue; + } + + // A pound # statement (IF/ELSE/ENDIF). + if (line.startsWith("#IF ")) { + // Recurse. + index = doIfDefinedStatement(lines, index, definedSet, outerInclude && includeBody, sb); + } else if (line.equals("#ELSE")) { + // Flip inclusion. + includeBody = !includeBody; + index++; + } else if (line.equals("#ENDIF")) { + throw new RuntimeException("Missing defined strings with #ENDIF on line " + (index + 1)); + } else if (line.startsWith("#ENDIF ")) { + String endCommaDefinedString = line.substring("#ENDIF ".length()); + if (!commaDefinedString.equals(endCommaDefinedString)) { + throw new RuntimeException( + "#ENDIF defined names \"" + endCommaDefinedString + "\" (line " + ifLineNumber + + " do not match \"" + commaDefinedString + "\" (line " + (index + 1) + ")"); + } + return ++index; + } else { + throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + } + } + } + + private void doEvaluateIfDefined(String[] lines, int index, Set definedSet, + boolean outerInclude, StringBuilder sb) { + final int end = lines.length; + while (true) { + if (index >= end) { + break; + } + String line = lines[index]; + if (line.length() == 0 || line.charAt(0) != '#') { + if (outerInclude) { + sb.append(line); + sb.append("\n"); + } + index++; + continue; + } + + // A pound # statement (IF/ELSE/ENDIF). + if (line.startsWith("#IF ")) { + index = doIfDefinedStatement(lines, index, definedSet, outerInclude, sb); + } else { + throw new RuntimeException("Problem with #IF/#ELSE/#ENDIF on line " + (index + 1) + ": " + line); + } + } + } + + private String evaluateIfDefined(String linesString, List definedList) { + String[] lines = linesString.split("\n"); + Set definedSet = new HashSet(definedList); + StringBuilder sb = new StringBuilder(); + doEvaluateIfDefined(lines, 0, definedSet, true, sb); + return sb.toString(); + } + + private String evaluateIfDefined(String linesString, String definedString) { + return evaluateIfDefined(linesString, Arrays.asList(definedString.split(","))); + } + static void writeFile(long templateTime, String outputDir, String classesDir, String className, String str) throws IOException { File outputFile = new File(outputDir, className + ".java");