diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java new file mode 100644 index 0000000000..9298e51ebf --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.functions; + +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; + +/** + * Aggregation function to represent: stddev_pop, stddev_samp, var_pop, var_samp. + */ +public class HiveSqlVarianceAggFunction extends SqlAggFunction { + + public HiveSqlVarianceAggFunction(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { + super(name, null, kind, returnTypeInference, operandTypeInference, + operandTypeChecker, SqlFunctionCategory.NUMERIC, false, false); + assert kind == SqlKind.STDDEV_POP || kind == SqlKind.STDDEV_SAMP || + kind == SqlKind.VAR_POP || kind == SqlKind.VAR_SAMP; + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java new file mode 100644 index 0000000000..fb65ce180c --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.CompositeList; +import org.apache.calcite.util.ImmutableIntList; +import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; + +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * This rule is a copy of {@link org.apache.calcite.rel.rules.AggregateReduceFunctionsRule} + * that regenerates Hive specific aggregate operators. + * + * TODO: When CALCITE-2216 is completed, we should be able to remove much of this code and + * just override the relevant methods. + * + * Planner rule that reduces aggregate functions in + * {@link org.apache.calcite.rel.core.Aggregate}s to simpler forms. + * + *

Rewrites: + *

+ */ +public class HiveAggregateReduceFunctionsRule extends RelOptRule { + //~ Static fields/initializers --------------------------------------------- + + /** The singleton. */ + public static final HiveAggregateReduceFunctionsRule INSTANCE = + new HiveAggregateReduceFunctionsRule(); + + //~ Constructors ----------------------------------------------------------- + + /** Creates an HiveAggregateReduceFunctionsRule. */ + public HiveAggregateReduceFunctionsRule() { + super(operand(HiveAggregate.class, any()), + HiveRelFactories.HIVE_BUILDER, null); + } + + //~ Methods ---------------------------------------------------------------- + + @Override public boolean matches(RelOptRuleCall call) { + if (!super.matches(call)) { + return false; + } + Aggregate oldAggRel = (Aggregate) call.rels[0]; + return containsAvgStddevVarCall(oldAggRel.getAggCallList()); + } + + public void onMatch(RelOptRuleCall ruleCall) { + Aggregate oldAggRel = (Aggregate) ruleCall.rels[0]; + reduceAggs(ruleCall, oldAggRel); + } + + /** + * Returns whether any of the aggregates are calls to AVG, STDDEV_*, VAR_*. + * + * @param aggCallList List of aggregate calls + */ + private boolean containsAvgStddevVarCall(List aggCallList) { + for (AggregateCall call : aggCallList) { + if (isReducible(call.getAggregation().getKind())) { + return true; + } + } + return false; + } + + /** + * Returns whether the aggregate call is a reducible function + */ + private boolean isReducible(final SqlKind kind) { + if (SqlKind.AVG_AGG_FUNCTIONS.contains(kind)) { + return true; + } + return false; + } + + /** + * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in + * the aggregates list to. + * + *

It handles newly generated common subexpressions since this was done + * at the sql2rel stage. + */ + private void reduceAggs( + RelOptRuleCall ruleCall, + Aggregate oldAggRel) { + RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder(); + + List oldCalls = oldAggRel.getAggCallList(); + final int groupCount = oldAggRel.getGroupCount(); + final int indicatorCount = oldAggRel.getIndicatorCount(); + + final List newCalls = Lists.newArrayList(); + final Map aggCallMapping = Maps.newHashMap(); + + final List projList = Lists.newArrayList(); + + // pass through group key (+ indicators if present) + for (int i = 0; i < groupCount + indicatorCount; ++i) { + projList.add( + rexBuilder.makeInputRef( + getFieldType(oldAggRel, i), + i)); + } + + // List of input expressions. If a particular aggregate needs more, it + // will add an expression to the end, and we will create an extra + // project. + final RelBuilder relBuilder = ruleCall.builder(); + relBuilder.push(oldAggRel.getInput()); + final List inputExprs = new ArrayList<>(relBuilder.fields()); + + // create new agg function calls and rest of project list together + for (AggregateCall oldCall : oldCalls) { + projList.add( + reduceAgg( + oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs)); + } + + final int extraArgCount = + inputExprs.size() - relBuilder.peek().getRowType().getFieldCount(); + if (extraArgCount > 0) { + relBuilder.project(inputExprs, + CompositeList.of( + relBuilder.peek().getRowType().getFieldNames(), + Collections.nCopies(extraArgCount, null))); + } + newAggregateRel(relBuilder, oldAggRel, newCalls); + relBuilder.project(projList, oldAggRel.getRowType().getFieldNames()) + .convert(oldAggRel.getRowType(), false); + ruleCall.transformTo(relBuilder.build()); + } + + private RexNode reduceAgg( + Aggregate oldAggRel, + AggregateCall oldCall, + List newCalls, + Map aggCallMapping, + List inputExprs) { + final SqlKind kind = oldCall.getAggregation().getKind(); + if (isReducible(kind)) { + switch (kind) { + case AVG: + // replace original AVG(x) with SUM(x) / COUNT(x) + return reduceAvg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs); + case STDDEV_POP: + // replace original STDDEV_POP(x) with + // SQRT( + // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x)) + // / COUNT(x)) + return reduceStddev(oldAggRel, oldCall, true, true, newCalls, + aggCallMapping, inputExprs); + case STDDEV_SAMP: + // replace original STDDEV_SAMP(x) with + // SQRT( + // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x)) + // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END) + return reduceStddev(oldAggRel, oldCall, false, true, newCalls, + aggCallMapping, inputExprs); + case VAR_POP: + // replace original VAR_POP(x) with + // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x)) + // / COUNT(x) + return reduceStddev(oldAggRel, oldCall, true, false, newCalls, + aggCallMapping, inputExprs); + case VAR_SAMP: + // replace original VAR_SAMP(x) with + // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x)) + // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END + return reduceStddev(oldAggRel, oldCall, false, false, newCalls, + aggCallMapping, inputExprs); + default: + throw Util.unexpected(kind); + } + } else { + // anything else: preserve original call + RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder(); + final int nGroups = oldAggRel.getGroupCount(); + List oldArgTypes = + SqlTypeUtil.projectTypes( + oldAggRel.getInput().getRowType(), oldCall.getArgList()); + return rexBuilder.addAggCall(oldCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + oldArgTypes); + } + } + + private AggregateCall createAggregateCallWithBinding( + RelDataTypeFactory typeFactory, + SqlAggFunction aggFunction, + RelDataType operandType, + Aggregate oldAggRel, + AggregateCall oldCall, + int argOrdinal) { + final Aggregate.AggCallBinding binding = + new Aggregate.AggCallBinding(typeFactory, aggFunction, + ImmutableList.of(operandType), oldAggRel.getGroupCount(), + oldCall.filterArg >= 0); + return AggregateCall.create(aggFunction, + oldCall.isDistinct(), + oldCall.isApproximate(), + ImmutableIntList.of(argOrdinal), + oldCall.filterArg, + aggFunction.inferReturnType(binding), + null); + } + + private RexNode reduceAvg( + Aggregate oldAggRel, + AggregateCall oldCall, + List newCalls, + Map aggCallMapping, + List inputExprs) { + final int nGroups = oldAggRel.getGroupCount(); + final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder(); + final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory(); + final int iAvgInput = oldCall.getArgList().get(0); + RelDataType avgInputType = typeFactory.createTypeWithNullability( + getFieldType(oldAggRel.getInput(), iAvgInput), true); + final AggregateCall sumCall = + AggregateCall.create( + new HiveSqlSumAggFunction( + oldCall.isDistinct(), + oldCall.getAggregation().getReturnTypeInference(), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, + oldCall.isDistinct(), + oldCall.isApproximate(), + oldCall.getArgList(), + oldCall.filterArg, + oldAggRel.getGroupCount(), + oldAggRel.getInput(), + null, + null); + RelDataType countRetType = typeFactory.createTypeWithNullability( + typeFactory.createSqlType(SqlTypeName.BIGINT), true); + final AggregateCall countCall = + AggregateCall.create( + new HiveSqlCountAggFunction( + oldCall.isDistinct(), + ReturnTypes.explicit(countRetType), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT, + oldCall.isDistinct(), + oldCall.isApproximate(), + oldCall.getArgList(), + oldCall.filterArg, + oldAggRel.getGroupCount(), + oldAggRel.getInput(), + countRetType, + null); + + // NOTE: these references are with respect to the output + // of newAggRel + RexNode numeratorRef = + rexBuilder.addAggCall(sumCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(avgInputType)); + final RexNode denominatorRef = + rexBuilder.addAggCall(countCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(avgInputType)); + + numeratorRef = rexBuilder.ensureType(oldCall.getType(), numeratorRef, true); + final RexNode divideRef = + rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, numeratorRef, denominatorRef); + return rexBuilder.makeCast(oldCall.getType(), divideRef); + } + + private RexNode reduceStddev( + Aggregate oldAggRel, + AggregateCall oldCall, + boolean biased, + boolean sqrt, + List newCalls, + Map aggCallMapping, + List inputExprs) { + // stddev_pop(x) ==> + // power( + // (sum(x * x) - sum(x) * sum(x) / count(x)) + // / count(x), + // .5) + // + // stddev_samp(x) ==> + // power( + // (sum(x * x) - sum(x) * sum(x) / count(x)) + // / nullif(count(x) - 1, 0), + // .5) + final int nGroups = oldAggRel.getGroupCount(); + final RelOptCluster cluster = oldAggRel.getCluster(); + final RexBuilder rexBuilder = cluster.getRexBuilder(); + final RelDataTypeFactory typeFactory = cluster.getTypeFactory(); + + assert oldCall.getArgList().size() == 1 : oldCall.getArgList(); + final int argOrdinal = oldCall.getArgList().get(0); + final RelDataType argOrdinalType = getFieldType(oldAggRel.getInput(), argOrdinal); + final RelDataType oldCallType = + typeFactory.createTypeWithNullability(oldCall.getType(), true); + + final RexNode argRef = + rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), false); + final int argRefOrdinal = lookupOrAdd(inputExprs, argRef); + + final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, + argRef, argRef); + final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared); + + final AggregateCall sumArgSquaredAggCall = + createAggregateCallWithBinding(typeFactory, + new HiveSqlSumAggFunction( + oldCall.isDistinct(), + oldCall.getAggregation().getReturnTypeInference(), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, + argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal); + + final RexNode sumArgSquared = + rexBuilder.addAggCall(sumArgSquaredAggCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(sumArgSquaredAggCall.getType())); + + final AggregateCall sumArgAggCall = + AggregateCall.create( + new HiveSqlSumAggFunction( + oldCall.isDistinct(), + oldCall.getAggregation().getReturnTypeInference(), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, + oldCall.isDistinct(), + oldCall.isApproximate(), + ImmutableIntList.of(argRefOrdinal), + oldCall.filterArg, + oldAggRel.getGroupCount(), + oldAggRel.getInput(), + null, + null); + + final RexNode sumArg = + rexBuilder.addAggCall(sumArgAggCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(sumArgAggCall.getType())); + final RexNode sumArgCast = rexBuilder.ensureType(oldCallType, sumArg, true); + final RexNode sumSquaredArg = + rexBuilder.makeCall( + SqlStdOperatorTable.MULTIPLY, sumArgCast, sumArgCast); + + RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true); + final AggregateCall countArgAggCall = + AggregateCall.create( + new HiveSqlCountAggFunction( + oldCall.isDistinct(), + ReturnTypes.explicit(countRetType), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT, + oldCall.isDistinct(), + oldCall.isApproximate(), + oldCall.getArgList(), + oldCall.filterArg, + oldAggRel.getGroupCount(), + oldAggRel.getInput(), + countRetType, + null); + + final RexNode countArg = + rexBuilder.addAggCall(countArgAggCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(argOrdinalType)); + + final RexNode avgSumSquaredArg = + rexBuilder.makeCall( + SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg); + + final RexNode diff = + rexBuilder.makeCall( + SqlStdOperatorTable.MINUS, + sumArgSquared, avgSumSquaredArg); + + final RexNode denominator; + if (biased) { + denominator = countArg; + } else { + final RexLiteral one = + rexBuilder.makeExactLiteral(BigDecimal.ONE); + final RexNode nul = + rexBuilder.makeCast(countArg.getType(), rexBuilder.constantNull()); + final RexNode countMinusOne = + rexBuilder.makeCall( + SqlStdOperatorTable.MINUS, countArg, one); + final RexNode countEqOne = + rexBuilder.makeCall( + SqlStdOperatorTable.EQUALS, countArg, one); + denominator = + rexBuilder.makeCall( + SqlStdOperatorTable.CASE, + countEqOne, nul, countMinusOne); + } + + final RexNode div = + rexBuilder.makeCall( + SqlStdOperatorTable.DIVIDE, diff, denominator); + + RexNode result = div; + if (sqrt) { + final RexNode half = + rexBuilder.makeExactLiteral(new BigDecimal("0.5")); + result = + rexBuilder.makeCall( + SqlStdOperatorTable.POWER, div, half); + } + + return rexBuilder.makeCast( + oldCall.getType(), result); + } + + /** + * Finds the ordinal of an element in a list, or adds it. + * + * @param list List + * @param element Element to lookup or add + * @return Ordinal of element in list + */ + private static int lookupOrAdd(List list, RexNode element) { + for (int ordinal = 0; ordinal < list.size(); ordinal++) { + if (list.get(ordinal).toString().equals(element.toString())) { + return ordinal; + } + } + list.add(element); + return list.size() - 1; + } + + /** + * Do a shallow clone of oldAggRel and update aggCalls. Could be refactored + * into Aggregate and subclasses - but it's only needed for some + * subclasses. + * + * @param relBuilder Builder of relational expressions; at the top of its + * stack is its input + * @param oldAggregate LogicalAggregate to clone. + * @param newCalls New list of AggregateCalls + */ + protected void newAggregateRel(RelBuilder relBuilder, + Aggregate oldAggregate, List newCalls) { + relBuilder.aggregate( + relBuilder.groupKey(oldAggregate.getGroupSet(), + oldAggregate.getGroupSets()), + newCalls); + } + + private RelDataType getFieldType(RelNode relNode, int i) { + final RelDataTypeField inputField = + relNode.getRowType().getFieldList().get(i); + return inputField.getType(); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index cb0c2b1b35..950abe16f4 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlVarianceAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate; @@ -577,48 +578,82 @@ public static SqlAggFunction getCalciteAggFn(String hiveUdfName, boolean isDisti CalciteUDFInfo udfInfo = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); switch (hiveUdfName.toLowerCase()) { - case "sum": - calciteAggFn = new HiveSqlSumAggFunction( - isDistinct, - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker); - break; - case "count": - calciteAggFn = new HiveSqlCountAggFunction( - isDistinct, - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker); - break; - case "min": - calciteAggFn = new HiveSqlMinMaxAggFunction( - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker, true); - break; - case "max": - calciteAggFn = new HiveSqlMinMaxAggFunction( - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker, false); - break; - case "avg": - calciteAggFn = new HiveSqlAverageAggFunction( - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker); + case "sum": + calciteAggFn = new HiveSqlSumAggFunction( + isDistinct, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "count": + calciteAggFn = new HiveSqlCountAggFunction( + isDistinct, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "min": + calciteAggFn = new HiveSqlMinMaxAggFunction( + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker, true); + break; + case "max": + calciteAggFn = new HiveSqlMinMaxAggFunction( + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker, false); + break; + case "avg": + calciteAggFn = new HiveSqlAverageAggFunction( + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "std": + case "stddev": + case "stddev_pop": + calciteAggFn = new HiveSqlVarianceAggFunction( + "stddev_pop", + SqlKind.STDDEV_POP, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "stddev_samp": + calciteAggFn = new HiveSqlVarianceAggFunction( + "stddev_samp", + SqlKind.STDDEV_SAMP, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "variance": + case "var_pop": + calciteAggFn = new HiveSqlVarianceAggFunction( + "var_pop", + SqlKind.VAR_POP, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + case "var_samp": + calciteAggFn = new HiveSqlVarianceAggFunction( + "var_samp", + SqlKind.VAR_SAMP, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); + break; + default: + calciteAggFn = new CalciteUDAF( + isDistinct, + udfInfo.udfName, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); break; - default: - calciteAggFn = new CalciteUDAF( - isDistinct, - udfInfo.udfName, - udfInfo.returnTypeInference, - udfInfo.operandTypeInference, - udfInfo.operandTypeChecker); - break; } - } return calciteAggFn; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 3520d90fa8..d90dde992b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -177,6 +177,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceFunctionsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule; @@ -1821,6 +1822,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE); rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE); rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE); + rules.add(HiveAggregateReduceFunctionsRule.INSTANCE); rules.add(HiveAggregateReduceRule.INSTANCE); if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) { rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses)); @@ -1839,7 +1841,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); -// it is happening at 1762 + // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java index 01c933c4a1..a0072f7e8e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java @@ -41,16 +41,30 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + parameters[0].getTypeName() + " is passed."); } switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case BYTE: + case SHORT: + case INT: case LONG: - return new SumZeroIfEmpty(); + return new SumLongZeroIfEmpty(); + case TIMESTAMP: + case FLOAT: + case DOUBLE: + case STRING: + case VARCHAR: + case CHAR: + return new SumDoubleZeroIfEmpty(); + case DECIMAL: + return new SumHiveDecimalZeroIfEmpty(); + case BOOLEAN: + case DATE: default: throw new UDFArgumentTypeException(0, - "Only bigint type arguments are accepted but " + "Only numeric or string type arguments are accepted but " + parameters[0].getTypeName() + " is passed."); } } - public static class SumZeroIfEmpty extends GenericUDAFSumLong { + public static class SumLongZeroIfEmpty extends GenericUDAFSumLong { @Override public Object terminate(AggregationBuffer agg) throws HiveException { @@ -59,5 +73,24 @@ public Object terminate(AggregationBuffer agg) throws HiveException { return result; } } -} + public static class SumDoubleZeroIfEmpty extends GenericUDAFSumDouble { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + SumDoubleAgg myagg = (SumDoubleAgg) agg; + result.set(myagg.sum); + return result; + } + } + + public static class SumHiveDecimalZeroIfEmpty extends GenericUDAFSumHiveDecimal { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg; + result.set(myagg.sum); + return result; + } + } +} diff --git a/ql/src/test/queries/clientpositive/groupby3.q b/ql/src/test/queries/clientpositive/groupby3.q index d709d9b5ad..284c2a8826 100755 --- a/ql/src/test/queries/clientpositive/groupby3.q +++ b/ql/src/test/queries/clientpositive/groupby3.q @@ -1,3 +1,4 @@ +set hive.cbo.enable=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr=false; diff --git a/ql/src/test/queries/clientpositive/groupby3_map_skew.q b/ql/src/test/queries/clientpositive/groupby3_map_skew.q index f9cb46ee32..8b18d11186 100644 --- a/ql/src/test/queries/clientpositive/groupby3_map_skew.q +++ b/ql/src/test/queries/clientpositive/groupby3_map_skew.q @@ -1,3 +1,4 @@ +set hive.cbo.enable=false; set hive.mapred.mode=nonstrict; set hive.map.aggr=true; set hive.groupby.skewindata=true; diff --git a/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q new file mode 100644 index 0000000000..ae4733f705 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q @@ -0,0 +1,60 @@ +set hive.cbo.enable=false; + +drop table t; +create table t (id int,px int,y decimal,x decimal); + +insert into t values (101,1,1,1); +insert into t values (201,2,1,1); +insert into t values (301,3,1,1); +insert into t values (401,4,1,11); +insert into t values (501,5,1,null); +insert into t values (601,6,null,1); +insert into t values (701,6,null,null); +insert into t values (102,1,2,2); +insert into t values (202,2,1,2); +insert into t values (302,3,2,1); +insert into t values (402,4,2,12); +insert into t values (502,5,2,null); +insert into t values (602,6,null,2); +insert into t values (702,6,null,null); +insert into t values (103,1,3,3); +insert into t values (203,2,1,3); +insert into t values (303,3,3,1); +insert into t values (403,4,3,13); +insert into t values (503,5,3,null); +insert into t values (603,6,null,3); +insert into t values (703,6,null,null); +insert into t values (104,1,4,4); +insert into t values (204,2,1,4); +insert into t values (304,3,4,1); +insert into t values (404,4,4,14); +insert into t values (504,5,4,null); +insert into t values (604,6,null,4); +insert into t values (704,6,null,null); +insert into t values (800,7,1,1); + + +explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px; + +select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px; + + +select id,regr_count(y,x) over (partition by px) from t order by id; diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index bece89f0d5..58e6f4684e 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -49,26 +49,27 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: $f0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: $f0, $f00, $f2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: $f0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: $f2 (type: double), $f00 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) mode: complete - outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double), $f9 (type: double), UDFToDouble($f10) (type: double) + expressions: $f0 (type: double), ($f0 / $f1) (type: double), ($f2 / $f3) (type: double), UDFToDouble($f4) (type: double), UDFToDouble($f5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / $f1), 0.5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END), 0.5) (type: double), (($f6 - (($f7 * $f7) / $f1)) / $f1) (type: double), (($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END) (type: double), $f2 (type: double), UDFToDouble($f3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,19 +78,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -146,10 +147,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -164,4 +165,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out index d6ff5b75cf..ee22ba4592 100644 --- a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out +++ b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out @@ -280,29 +280,29 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(key), min(key), avg(key) + aggregations: max(key), min(key), sum(key), count(key) keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: struct) + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), avg(VALUE._col2) + aggregations: max(VALUE._col0), min(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col1), count(_col0), min(_col2), avg(_col3) + aggregations: max(_col1), count(_col0), min(_col2), sum(_col3), count(_col4) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -316,21 +316,25 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -564,33 +568,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), stddev(key) - keys: key (type: string) + aggregations: count(), sum(_col2), sum(_col1), count(_col0) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col3 (type: struct) + value expressions: _col1 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev(VALUE._col1) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col1), count(_col0), stddev(_col2) + aggregations: count(_col1), count(_col0), sum(_col2), sum(_col3), count(_col4) mode: partial2 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -604,21 +608,21 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), stddev(VALUE._col2) + aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(_col2) (type: int) + expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -683,33 +687,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + aggregations: sum(_col0), count(_col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), min(VALUE._col3), std(VALUE._col4), stddev_samp(VALUE._col5), variance(VALUE._col6), var_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1), avg(_col2), count(_col0), max(_col3), min(_col4), std(_col5), stddev_samp(_col6), variance(_col7), var_samp(_col8) + aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -723,21 +727,21 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(_col5) (type: int), UDFToInteger(_col6) (type: int), UDFToInteger(_col7) (type: int), UDFToInteger(_col8) (type: int) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5)) (type: int), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / _col1)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -796,26 +800,26 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(key), count(DISTINCT key), min(key), avg(key) + aggregations: max(key), count(DISTINCT key), min(key), sum(key), count(key) keys: value (type: string), key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: struct) + value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), avg(VALUE._col3) + aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: double) + expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), (_col4 / _col5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/decimal_precision.q.out b/ql/src/test/results/clientpositive/decimal_precision.q.out index 3f28106685..2ac3190bc8 100644 --- a/ql/src/test/results/clientpositive/decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/decimal_precision.q.out @@ -542,27 +542,31 @@ STAGE PLANS: outputColumnNames: dec Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(dec), sum(dec) + aggregations: sum(dec), count(dec) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -578,7 +582,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_precision #### A masked pattern was here #### -88499534.57586576220645 2743485571.8518386284 +88499534.575865762206451613 2743485571.8518386284 PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_precision diff --git a/ql/src/test/results/clientpositive/decimal_udf.q.out b/ql/src/test/results/clientpositive/decimal_udf.q.out index e451a186fc..e6df9fbda1 100644 --- a/ql/src/test/results/clientpositive/decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/decimal_udf.q.out @@ -1282,26 +1282,26 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(key), count(key), avg(key) + aggregations: sum(key), count(key) keys: value (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) + value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) + expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), (CAST( _col1 AS decimal(24,14)) / _col2) (type: decimal(38,28)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1319,10 +1319,10 @@ STAGE PLANS: key expressions: _col0 (type: int) sort order: + Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) + value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(38,28)), _col3 (type: decimal(30,10)) Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,28)), VALUE._col2 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1347,23 +1347,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### --1234567890 -1234567890.123456789000000000 -1234567890.12345678900000 -1234567890.1234567890 --1255 -1255.490000000000000000 -1255.49000000000000 -1255.4900000000 --11 -1.122000000000000000 -1.12200000000000 -1.1220000000 --1 -1.120000000000000000 -1.12000000000000 -2.2400000000 -0 0.025384615384615385 0.02538461538462 0.3300000000 -1 1.048400000000000000 1.04840000000000 5.2420000000 -2 2.000000000000000000 2.00000000000000 4.0000000000 -3 3.140000000000000000 3.14000000000000 9.4200000000 -4 3.140000000000000000 3.14000000000000 3.1400000000 -10 10.000000000000000000 10.00000000000000 10.0000000000 -20 20.000000000000000000 20.00000000000000 20.0000000000 -100 100.000000000000000000 100.00000000000000 100.0000000000 -124 124.000000000000000000 124.00000000000000 124.0000000000 -125 125.200000000000000000 125.20000000000000 125.2000000000 -200 200.000000000000000000 200.00000000000000 200.0000000000 -4400 -4400.000000000000000000 -4400.00000000000000 -4400.0000000000 -1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800 +-1234567890 -1234567890.123456789000000000 -1234567890.1234567890000000000000000000 -1234567890.1234567890 +-1255 -1255.490000000000000000 -1255.4900000000000000000000000000 -1255.4900000000 +-11 -1.122000000000000000 -1.1220000000000000000000000000 -1.1220000000 +-1 -1.120000000000000000 -1.1200000000000000000000000000 -2.2400000000 +0 0.025384615384615385 0.0253846153846153846153846154 0.3300000000 +1 1.048400000000000000 1.0484000000000000000000000000 5.2420000000 +2 2.000000000000000000 2.0000000000000000000000000000 4.0000000000 +3 3.140000000000000000 3.1400000000000000000000000000 9.4200000000 +4 3.140000000000000000 3.1400000000000000000000000000 3.1400000000 +10 10.000000000000000000 10.0000000000000000000000000000 10.0000000000 +20 20.000000000000000000 20.0000000000000000000000000000 20.0000000000 +100 100.000000000000000000 100.0000000000000000000000000000 100.0000000000 +124 124.000000000000000000 124.0000000000000000000000000000 124.0000000000 +125 125.200000000000000000 125.2000000000000000000000000000 125.2000000000 +200 200.000000000000000000 200.0000000000000000000000000000 200.0000000000 +4400 -4400.000000000000000000 -4400.0000000000000000000000000000 -4400.0000000000 +1234567890 1234567890.123456780000000000 1234567890.1234567800000000000000000000 1234567890.1234567800 PREHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF @@ -1849,35 +1849,39 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev(key), variance(key) - keys: value (type: int) + aggregations: sum(_col3), sum(_col2), count(_col1) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: stddev(VALUE._col0), variance(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1897,8 +1901,8 @@ POSTHOOK: Input: default@decimal_udf -1255 0.0 0.0 -11 0.0 0.0 -1 0.0 0.0 -0 0.22561046704494161 0.050900082840236685 -1 0.05928102563215321 0.0035142400000000066 +0 0.22561046704494161 0.05090008284023669 +1 0.05928102563215448 0.003514240000000157 2 0.0 0.0 3 0.0 0.0 4 0.0 0.0 @@ -1926,35 +1930,39 @@ STAGE PLANS: alias: decimal_udf Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(key), var_samp(key) - keys: value (type: int) + aggregations: sum(_col3), sum(_col2), count(_col1) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1974,8 +1982,8 @@ POSTHOOK: Input: default@decimal_udf -1255 NULL NULL -11 NULL NULL -1 0.0 0.0 -0 0.2348228191855647 0.055141756410256405 -1 0.06627820154470102 0.004392800000000008 +0 0.23482281918556472 0.05514175641025642 +1 0.06627820154470243 0.0043928000000001965 2 0.0 0.0 3 0.0 0.0 4 NULL NULL diff --git a/ql/src/test/results/clientpositive/fetch_aggregation.q.out b/ql/src/test/results/clientpositive/fetch_aggregation.q.out index f20320fd9b..801c6de35d 100644 --- a/ql/src/test/results/clientpositive/fetch_aggregation.q.out +++ b/ql/src/test/results/clientpositive/fetch_aggregation.q.out @@ -16,14 +16,14 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(key), sum(key), avg(key), min(key), max(key), std(key), variance(key) + aggregations: count(_col0), sum(_col0), min(_col0), max(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -36,11 +36,15 @@ STAGE PLANS: limit: -1 Processor Tree: Group By Operator - aggregations: count(_col0), sum(_col1), avg(_col2), min(_col3), max(_col4), std(_col5), variance(_col6) + aggregations: count(_col0), sum(_col1), min(_col2), max(_col3), sum(_col4), sum(_col5) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE - ListSink + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: double), (_col1 / _col0) (type: double), _col2 (type: string), _col3 (type: string), power(((_col4 - ((_col5 * _col5) / _col0)) / _col0), 0.5) (type: double), ((_col4 - ((_col5 * _col5) / _col0)) / _col0) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + ListSink PREHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src PREHOOK: type: QUERY @@ -50,4 +54,4 @@ POSTHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key), POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -500 130091.0 260.182 0 98 142.92680950752384 20428.072876000006 +500 130091.0 260.182 0 98 142.9268095075238 20428.072876000002 diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out index 7c97174830..0a566c7570 100644 --- a/ql/src/test/results/clientpositive/groupby3.q.out +++ b/ql/src/test/results/clientpositive/groupby3.q.out @@ -47,13 +47,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out index edad22b93f..06c476b145 100644 --- a/ql/src/test/results/clientpositive/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map.q.out @@ -45,33 +45,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -80,19 +80,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -146,7 +146,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index 20344640e6..f94ef49c27 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -49,33 +49,33 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0) + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -84,19 +84,19 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -153,10 +153,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -171,4 +171,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 79136.0 309.0 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out index e53e62c2ce..1ef5a45e88 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -46,12 +46,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) - keys: _col0 (type: string) + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out index 1aa4cb6ce6..83e107054d 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -45,26 +45,27 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0) + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -73,7 +74,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll') mode: complete @@ -135,7 +136,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -150,4 +151,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index bb964e66d6..6acc9a9016 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -49,26 +49,27 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col1 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -77,7 +78,7 @@ STAGE PLANS: Select Operator expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll') mode: complete @@ -142,10 +143,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -160,4 +161,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out index e894205a27..5d27f4ca2c 100644 --- a/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out +++ b/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out @@ -45,27 +45,27 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -105,27 +105,27 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -191,23 +191,23 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: bigint) + value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint) mode: partials - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -225,17 +225,17 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: final - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out index 12fae67586..281b81d2ff 100644 --- a/ql/src/test/results/clientpositive/having2.q.out +++ b/ql/src/test/results/clientpositive/having2.q.out @@ -311,10 +311,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col2), avg(_col0), count(_col4) + aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4) keys: _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -332,16 +332,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((_col1 <= 4074689.000000041D) and (_col2 <= 822.0D) and (_col3 > 4L)) (type: boolean) + predicate: (((_col2 / _col3) <= 822.0D) and (_col1 <= 4074689.000000041D) and (_col4 > 4L)) (type: boolean) Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -430,10 +430,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col2), avg(_col0), count(_col4) + aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4) keys: _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -451,20 +451,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((_col2 <= 4074689.000000041D) and (_col3 <= 822.0D) and (_col4 > 4L)) (type: boolean) + predicate: (((_col3 / _col4) <= 822.0D) and (_col2 <= 4074689.000000041D) and (_col5 > 4L)) (type: boolean) Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -553,10 +553,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: sum(_col2), avg(_col0), count(_col4) + aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4) keys: _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false @@ -574,20 +574,20 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) + outputColumnNames: _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: ((_col2 <= 4074689.000000041D) and (_col3 <= 822.0D) and (_col4 > 4L)) (type: boolean) + predicate: (((_col3 / _col4) <= 822.0D) and (_col2 <= 4074689.000000041D) and (_col5 > 4L)) (type: boolean) Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col1 (type: string), _col1 (type: string) diff --git a/ql/src/test/results/clientpositive/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/limit_pushdown2.q.out index 5aeb5213e5..bae6e248e7 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown2.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -24,10 +24,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -35,24 +35,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -118,10 +122,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -129,24 +133,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -212,10 +220,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -223,24 +231,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -306,10 +318,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -317,24 +329,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -400,10 +416,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -411,24 +427,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -494,10 +514,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -505,24 +525,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -821,30 +845,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -936,27 +964,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct) + value expressions: _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1025,27 +1053,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct) + value expressions: _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out index 347ae3780d..df84bbfa39 100644 --- a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out +++ b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out @@ -289,52 +289,56 @@ STAGE PLANS: outputColumnNames: key Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(key), min(key), avg(key) + aggregations: max(key), min(key), sum(key), count(key) keys: key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: struct) + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: double), _col5 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), avg(VALUE._col2) + aggregations: max(VALUE._col0), min(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(_col1), count(_col0), min(_col2), avg(_col3) + aggregations: max(_col1), count(_col0), min(_col2), sum(_col3), count(_col4) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -576,51 +580,51 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key + expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(), stddev(key) - keys: key (type: string) + aggregations: count(), sum(_col2), sum(_col1), count(_col0) + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col3 (type: struct) + Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev(VALUE._col1) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col1), count(_col0), stddev(_col2) + aggregations: count(_col1), count(_col0), sum(_col2), sum(_col3), count(_col4) mode: partial2 - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), stddev(VALUE._col2) + aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(_col2) (type: int) + expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -696,51 +700,51 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), avg(_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + aggregations: sum(_col0), count(_col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 + Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), min(VALUE._col3), std(VALUE._col4), stddev_samp(VALUE._col5), variance(VALUE._col6), var_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5) keys: KEY._col0 (type: string) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col1), avg(_col2), count(_col0), max(_col3), min(_col4), std(_col5), stddev_samp(_col6), variance(_col7), var_samp(_col8) + aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6) mode: partial2 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: double) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(_col5) (type: int), UDFToInteger(_col6) (type: int), UDFToInteger(_col7) (type: int), UDFToInteger(_col8) (type: int) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5)) (type: int), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / _col1)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -810,30 +814,30 @@ STAGE PLANS: outputColumnNames: key, value Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(key), count(DISTINCT key), min(key), avg(key) + aggregations: max(key), count(DISTINCT key), min(key), sum(key), count(key) keys: value (type: string), key (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 202500 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 202500 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: struct) + Statistics: Num rows: 250 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), avg(VALUE._col3) + aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 118750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 250 Data size: 120750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: double) + expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), (_col4 / _col5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 98743eb9db..ce53955692 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -2144,42 +2144,42 @@ Stage-0 limit:-1 Stage-1 Reducer 3 llap - File Output Operator [FS_21] - Merge Join Operator [MERGEJOIN_26] (rows=6 width=227) - Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"] + File Output Operator [FS_22] + Merge Join Operator [MERGEJOIN_27] (rows=6 width=227) + Conds:RS_18._col1=RS_19._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_17] + SHUFFLE [RS_18] PartitionCols:_col1 Select Operator [SEL_6] (rows=13 width=227) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=227) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1 + Group By Operator [GBY_5] (rows=13 width=235) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_4] PartitionCols:_col0, _col1 - Group By Operator [GBY_3] (rows=13 width=295) - Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_24] (rows=26 width=223) + Group By Operator [GBY_3] (rows=13 width=235) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(p_size)","count(p_size)"],keys:p_name, p_mfgr + Filter Operator [FIL_25] (rows=26 width=223) predicate:p_name is not null TableScan [TS_0] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 5 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] + SHUFFLE [RS_19] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=13 width=184) + Group By Operator [GBY_17] (rows=13 width=184) Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=26 width=184) + Select Operator [SEL_12] (rows=26 width=184) Output:["_col0"] - Filter Operator [FIL_25] (rows=26 width=491) + Filter Operator [FIL_26] (rows=26 width=491) predicate:first_value_window_0 is not null - PTF Operator [PTF_10] (rows=26 width=491) + PTF Operator [PTF_11] (rows=26 width=491) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_9] (rows=26 width=491) + Select Operator [SEL_10] (rows=26 width=491) Output:["_col1","_col2","_col5"] <-Map 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_8] + SHUFFLE [RS_9] PartitionCols:p_mfgr - TableScan [TS_7] (rows=26 width=223) + TableScan [TS_8] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * @@ -2386,12 +2386,12 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_20] (rows=1 width=12) Output:["_col0","_col1"] - Group By Operator [GBY_7] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] + Group By Operator [GBY_7] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] - Group By Operator [GBY_5] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] + Group By Operator [GBY_5] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"] Filter Operator [FIL_33] (rows=8 width=4) predicate:(p_size < 10) TableScan [TS_2] (rows=26 width=4) @@ -2405,7 +2405,9 @@ Stage-0 SHUFFLE [RS_22] Group By Operator [GBY_12] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] - Please refer to the previous Group By Operator [GBY_7] + Select Operator [SEL_8] (rows=1 width=16) + Output:["_col0"] + Please refer to the previous Group By Operator [GBY_7] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_21] Select Operator [SEL_1] (rows=26 width=125) diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index d050c4ec69..05b5bfddc7 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -54,13 +54,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out index fe6b4f96e0..d1ac4e9b83 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out @@ -296,39 +296,43 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out index 97686cb5d1..960414ba37 100644 --- a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out +++ b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out @@ -311,35 +311,39 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: double) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.3 + value expressions: _col1 (type: double) Reducer 3 Execution mode: llap Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out index cce6bc3c0a..09a120ae12 100644 --- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out +++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out @@ -299,40 +299,44 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Offset of rows: 10 - Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Offset of rows: 10 Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out index 1ccdff8aa2..c786ba31c7 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out @@ -191,34 +191,34 @@ STAGE PLANS: alias: parquet_types Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round((_col4 / _col5), 5) (type: double), round(power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5), 5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 277036a8e5..37b18f01e6 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -360,30 +360,34 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) + predicate: (_col0 is not null and _col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1268,17 +1272,17 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(l_quantity) + aggregations: sum(l_quantity), count(l_quantity) keys: l_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1337,16 +1341,16 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (_col1 is not null and _col2 is not null) (type: boolean) + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: double), _col0 (type: int) + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -4463,17 +4467,17 @@ STAGE PLANS: predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4498,24 +4502,32 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: _col1 is not null (type: boolean) - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash + predicate: (_col1 is not null and _col2 is not null) (type: boolean) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -5051,23 +5063,23 @@ STAGE PLANS: predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5160,34 +5172,38 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count(), count(_col1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: double), _col0 (type: int) + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index 390caf0a01..79857ab365 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -279,17 +279,17 @@ STAGE PLANS: predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_mfgr (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Group By Operator aggregations: max(p_size), min(p_size) keys: p_mfgr (type: string) @@ -308,17 +308,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -410,17 +414,17 @@ STAGE PLANS: predicate: p_mfgr is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_mfgr (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Group By Operator aggregations: max(p_size), min(p_size) keys: p_mfgr (type: string) @@ -439,17 +443,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1432,17 +1440,17 @@ STAGE PLANS: predicate: p_name is not null (type: boolean) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_name (type: string), p_mfgr (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: bigint), _col3 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1462,13 +1470,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col2 / _col3) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE Map Join Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 438e44470a..24f18e9672 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -2888,35 +2888,39 @@ STAGE PLANS: outputColumnNames: l_quantity Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(l_quantity) + aggregations: sum(l_quantity), count(l_quantity) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 10 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Select Operator - expressions: _col0 (type: double), true (type: boolean) + expressions: (_col0 / _col1) (type: double), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index 851a783fd6..7b8e87aa22 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -715,14 +715,14 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) PTF Operator Function definitions: Input definition @@ -752,41 +752,45 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), true (type: boolean) + expressions: (_col0 / _col1) (type: double), true (type: boolean) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 1483651f62..4a8c9b1574 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -122,14 +122,14 @@ STAGE PLANS: outputColumnNames: p_size Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -143,14 +143,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 residual filter predicates: {(UDFToDouble(_col5) > _col9)} - Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -159,14 +159,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -826,14 +830,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -892,14 +896,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -1595,17 +1603,17 @@ STAGE PLANS: predicate: p_type is not null (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_type (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1635,13 +1643,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: double), true (type: boolean), _col0 (type: string) + expressions: (_col1 / _col2) (type: double), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -4177,17 +4185,17 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(l_quantity) + aggregations: sum(l_quantity), count(l_quantity) keys: l_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -4236,13 +4244,13 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: double), _col0 (type: int) + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -6328,29 +6336,29 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) keys: _col2 (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (_col1 = 0.0D) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (0.0D = (_col1 / _col2)) (type: boolean) + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index abbfffd9be..8ad070508e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -3815,14 +3815,14 @@ STAGE PLANS: predicate: p_partkey BETWEEN 1 AND 20 (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(p_partkey) + aggregations: sum(p_partkey), count(p_partkey) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 9 @@ -3928,14 +3928,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -4175,14 +4179,14 @@ STAGE PLANS: outputColumnNames: p_size Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Map 6 @@ -4254,14 +4258,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -4386,14 +4394,14 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Group By Operator aggregations: sum(p_size) mode: hash @@ -4512,14 +4520,18 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 5d4bfe7fa4..1e090f0c1c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -140,9 +140,9 @@ STAGE PLANS: projectedOutputColumnNums: [6] Statistics: Num rows: 2000 Data size: 212912 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(dc), max(dc), sum(dc), avg(dc) + aggregations: min(dc), max(dc), sum(dc), count(dc) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 6:decimal(38,18)) -> struct + aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 6:decimal(38,18)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -150,7 +150,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -159,8 +159,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -190,14 +190,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,18), VALUE._col2:decimal(38,18), VALUE._col3:struct + dataColumns: VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,18), VALUE._col2:decimal(38,18), VALUE._col3:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 0:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 1:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 2:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(38,18) + aggregators: VectorUDAFMinDecimal(col 0:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 1:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 2:decimal(38,18)) -> decimal(38,18), VectorUDAFCountMerge(col 3:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -205,17 +205,26 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), (_col2 / _col3) (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(38,18), col 4:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,18) + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -271,9 +280,9 @@ STAGE PLANS: projectedOutputColumnNums: [5] Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(d), max(d), sum(d), avg(d) + aggregations: min(d), max(d), sum(d), count(d) Group By Vectorization: - aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -281,7 +290,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -290,8 +299,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -321,14 +330,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:double, VALUE._col3:struct + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) Group By Vectorization: - aggregators: VectorUDAFMinDouble(col 0:double) -> double, VectorUDAFMaxDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFMinDouble(col 0:double) -> double, VectorUDAFMaxDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -336,17 +345,26 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4] + selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 4:double + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -402,9 +420,9 @@ STAGE PLANS: projectedOutputColumnNums: [10] Statistics: Num rows: 2000 Data size: 76040 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ts), max(ts), sum(ts), avg(ts) + aggregations: min(ts), max(ts), sum(ts), count(ts) Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFAvgTimestamp(col 10:timestamp) -> struct + aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFCount(col 10:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -412,7 +430,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -421,8 +439,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -452,14 +470,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp, VALUE._col2:double, VALUE._col3:struct + dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp, VALUE._col2:double, VALUE._col3:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -467,17 +485,26 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 4] + selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 4:double + Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index 4cb6213b89..861ae9ab14 100644 --- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -142,19 +142,19 @@ STAGE PLANS: projectedOutputColumnNums: [2] Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(50), avg(50.0D), avg(50) + aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 12:int) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 14:decimal(10,0)) -> struct + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 2:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -163,9 +163,9 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -187,29 +187,38 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 1:struct) -> double, VectorUDAFAvgFinal(col 2:struct) -> double, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,4) + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(12,0)) -> decimal(12,0), VectorUDAFCountMerge(col 6:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double), (_col3 / _col4) (type: double), CAST( (_col5 / _col6) AS decimal(6,4)) (type: decimal(6,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 7, 8, 11] + selectExpressions: LongColDivideLongColumn(col 1:bigint, col 2:bigint) -> 7:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 8:double, CastDecimalToDecimal(col 10:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 5:decimal(12,0), col 9:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 9:decimal(19,0)) -> 10:decimal(32,20)) -> 11:decimal(6,4) Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(6,4)) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -220,7 +229,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(6,4)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 4bb8a01059..902d137b9c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -252,26 +252,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10] + selectExpressions: CastDecimalToDouble(col 1:decimal(20,10)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(20,10)) -> 6:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(23,14)) -> 7:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 9:double) -> 10:double Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFAvgDecimal(col 1:decimal(20,10)) -> struct, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFAvgDecimal(col 2:decimal(23,14)) -> struct, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -282,9 +283,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -301,30 +302,58 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(20,10), cdecimal2:decimal(23,14), cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(23,14), VALUE._col8:decimal(23,14), VALUE._col9:decimal(33,14), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 10:decimal(33,14)) -> decimal(33,14), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), (CAST( _col4 AS decimal(24,14)) / _col1) (type: decimal(38,28)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), (CAST( _col10 AS decimal(27,18)) / _col7) (type: decimal(38,29)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 24, 19, 25] + selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(24,14), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(30,10)) -> 14:decimal(24,14), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(38,28), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, DecimalColDivideDecimalColumn(col 23:decimal(27,18), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(33,14)) -> 23:decimal(27,18), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,29), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -355,14 +384,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby #### A masked pattern was here #### --3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054 --563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.63641486490000 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.503273076922950000 1707.9424961538462 2415.395441814127 -253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274 -528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984 -626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603 -6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653 -762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118758 -NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924 +-3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.6735247296333333333333333333 2174330.209240386 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.81120730764118333333333333333 2604201.2704476737 2852759.5602156054 +-563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649000000000000000000 1426.0153418918997 2016.6902366556305 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.50327307692295000000000000000 1707.9424961538462 2415.395441814127 +253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.3399236697630859375000000000 5708.956347886203 5711.745967572781 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.42835967548079188476562500000 6837.632716002931 6840.973851172272 +528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.4109968243230468750000000000 257528.9298820665 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.96562480769168948242187500000 308443.1074570797 308593.82484083937 +626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.2939966110631835937500000000 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.32914804687497798828125000000 6877.318722794881 6880.679250101608 +6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.6757008106666666666666666667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.69651461528206666666666666667 3292794.4113115156 4032833.0678006653 +762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917000000000000000000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.07839499984625000000000000000 3491310.1327026924 4937458.140118757 +NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.6081081080666666666666666667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.57692307692296666666666666667 6821.495748565151 6822.606289190915 PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2, @@ -637,26 +666,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5), 2:cdecimal2:decimal(16,0), 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10] + selectExpressions: CastDecimalToDouble(col 1:decimal(11,5)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(11,5)) -> 6:double, CastDecimalToDouble(col 1:decimal(11,5)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(16,0)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(16,0)) -> 7:double, CastDecimalToDouble(col 2:decimal(16,0)) -> 9:double) -> 10:double Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(11,5)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 1:decimal(11,5)) -> decimal(21,5), VectorUDAFAvgDecimal(col 1:decimal(11,5)) -> struct, VectorUDAFVarDecimal(col 1:decimal(11,5)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(11,5)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(16,0)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 2:decimal(16,0)) -> decimal(26,0), VectorUDAFAvgDecimal(col 2:decimal(16,0)) -> struct, VectorUDAFVarDecimal(col 2:decimal(16,0)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(16,0)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(11,5)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 1:decimal(11,5)) -> decimal(21,5), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(16,0)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 2:decimal(16,0)) -> decimal(26,0), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -667,9 +697,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -687,30 +717,58 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(11,5), cdecimal2:decimal(16,0), cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0), VALUE._col8:decimal(16,0), VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), (CAST( _col4 AS decimal(15,9)) / _col1) (type: decimal(35,29)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), CAST( (CAST( _col10 AS decimal(20,4)) / _col7) AS decimal(20,4)) (type: decimal(20,4)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 23, 19, 25] + selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(15,9), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(21,5)) -> 14:decimal(15,9), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(35,29), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, CastDecimalToDecimal(col 24:decimal(38,22))(children: DecimalColDivideDecimalColumn(col 23:decimal(20,4), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(26,0)) -> 23:decimal(20,4), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,22)) -> 23:decimal(20,4), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -741,14 +799,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### --3728 5 -515.62107 -3367.65176 -13986.22811 -2797.245622000 1140.812276 1275.466899351126 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 --563 2 -515.62107 -3367.65176 -3883.27283 -1941.636415000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 -253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.339923750 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 -528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.351935137 5555.7621107931345 5558.482190324908 1024 6984454 -11710 13948890 13621.9629 308443.09823296947 308593.8156122219 -626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325366 5744.897264122336 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 -6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 -762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 -NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +-3728 5 -515.62107 -3367.65176 -13986.22811 -2797.24562200000000000000000000000 1140.8122759999992 1275.466899351125 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 +-563 2 -515.62107 -3367.65176 -3883.27283 -1941.63641500000000000000000000000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 +253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.33992375000000000000000000000 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 +528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.35193513698630136986301369863 5555.762110793133 5558.482190324906 1024 6984454 -11710 13948890 13621.9629 308443.0982329696 308593.815612222 +626923679 1024 9723.40270 -9778.95135 10541.05247 10.29399655273437500000000000000 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 +6981 2 -515.62107 -515.62107 -1031.24214 -515.62107000000000000000000000000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 +762 1 1531.21941 1531.21941 1531.21941 1531.21941000000000000000000000000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 +NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.60811000000000000000000000000 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -767,4 +825,4 @@ FROM (SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### -91757235680 +96966670826 diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 18b903b3c3..50e4305b2e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -586,9 +586,9 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(dec), sum(dec) + aggregations: sum(dec), count(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -596,7 +596,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -605,8 +605,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -636,14 +636,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: VALUE._col0:struct, VALUE._col1:decimal(30,10) + dataColumns: VALUE._col0:decimal(30,10), VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalFinal(col 0:struct) -> decimal(24,14), VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 1:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -651,17 +651,26 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 4] + selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(30,10), col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 2:decimal(19,0)) -> 3:decimal(38,18), CastDecimalToDecimal(col 0:decimal(30,10)) -> 4:decimal(30,10) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -677,7 +686,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_precision #### A masked pattern was here #### -88499534.57586576220645 2743485571.8518386284 +88499534.575865762206451613 2743485571.8518386284 PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_precision @@ -1196,9 +1205,9 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(dec), sum(dec) + aggregations: sum(dec), count(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1206,7 +1215,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1215,8 +1224,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -1247,14 +1256,14 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - dataColumns: VALUE._col0:struct, VALUE._col1:decimal(30,10) + dataColumns: VALUE._col0:decimal(30,10), VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalFinal(col 0:struct) -> decimal(24,14), VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 1:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false @@ -1262,17 +1271,26 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 4] + selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(30,10), col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 2:decimal(19,0)) -> 3:decimal(38,18), CastDecimalToDecimal(col 0:decimal(30,10)) -> 4:decimal(30,10) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1288,7 +1306,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_precision_txt_small #### A masked pattern was here #### -88499534.57586576220645 2743485571.8518386284 +88499534.575865762206451613 2743485571.8518386284 PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION_txt_small LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_precision_txt_small diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 59b3c4a017..c6867f882b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2297,19 +2297,19 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(key), count(key), avg(key) + aggregations: sum(key), count(key) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint, VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct + aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1] keys: value (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 18 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -2319,9 +2319,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] - Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) + valueColumnNums: [1, 2] + Statistics: Num rows: 18 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2350,33 +2350,33 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:int, VALUE._col0:decimal(30,10), VALUE._col1:bigint, VALUE._col2:struct + dataColumnCount: 3 + dataColumns: KEY._col0:int, VALUE._col0:decimal(30,10), VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(24,14) + aggregators: VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 2:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 17 Data size: 4012 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) + expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), (CAST( _col1 AS decimal(24,14)) / _col2) (type: decimal(38,28)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 5, 3, 1] - selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(30,10), col 4:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,18) + projectedOutputColumnNums: [0, 4, 6, 1] + selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(30,10), col 3:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 4:decimal(38,18), DecimalColDivideDecimalColumn(col 5:decimal(24,14), col 3:decimal(19,0))(children: CastDecimalToDecimal(col 1:decimal(30,10)) -> 5:decimal(24,14), CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 6:decimal(38,28) Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) @@ -2386,9 +2386,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumnNums: [4, 6, 1] Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) + value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(38,28)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2401,12 +2401,12 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18), VALUE._col1:decimal(24,14), VALUE._col2:decimal(30,10) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,28), VALUE._col2:decimal(30,10) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,28)), VALUE._col2 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator @@ -2438,23 +2438,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf #### A masked pattern was here #### --1 -1.120000000000000000 -1.12000000000000 -2.2400000000 --11 -1.122000000000000000 -1.12200000000000 -1.1220000000 --1234567890 -1234567890.123456789000000000 -1234567890.12345678900000 -1234567890.1234567890 --1255 -1255.490000000000000000 -1255.49000000000000 -1255.4900000000 -0 0.025384615384615385 0.02538461538462 0.3300000000 -1 1.048400000000000000 1.04840000000000 5.2420000000 -10 10.000000000000000000 10.00000000000000 10.0000000000 -100 100.000000000000000000 100.00000000000000 100.0000000000 -1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800 -124 124.000000000000000000 124.00000000000000 124.0000000000 -125 125.200000000000000000 125.20000000000000 125.2000000000 -2 2.000000000000000000 2.00000000000000 4.0000000000 -20 20.000000000000000000 20.00000000000000 20.0000000000 -200 200.000000000000000000 200.00000000000000 200.0000000000 -3 3.140000000000000000 3.14000000000000 9.4200000000 -4 3.140000000000000000 3.14000000000000 3.1400000000 -4400 -4400.000000000000000000 -4400.00000000000000 -4400.0000000000 +-1 -1.120000000000000000 -1.1200000000000000000000000000 -2.2400000000 +-11 -1.122000000000000000 -1.1220000000000000000000000000 -1.1220000000 +-1234567890 -1234567890.123456789000000000 -1234567890.1234567890000000000000000000 -1234567890.1234567890 +-1255 -1255.490000000000000000 -1255.4900000000000000000000000000 -1255.4900000000 +0 0.025384615384615385 0.0253846153846153846153846154 0.3300000000 +1 1.048400000000000000 1.0484000000000000000000000000 5.2420000000 +10 10.000000000000000000 10.0000000000000000000000000000 10.0000000000 +100 100.000000000000000000 100.0000000000000000000000000000 100.0000000000 +1234567890 1234567890.123456780000000000 1234567890.1234567800000000000000000000 1234567890.1234567800 +124 124.000000000000000000 124.0000000000000000000000000000 124.0000000000 +125 125.200000000000000000 125.2000000000000000000000000000 125.2000000000 +2 2.000000000000000000 2.0000000000000000000000000000 4.0000000000 +20 20.000000000000000000 20.0000000000000000000000000000 20.0000000000 +200 200.000000000000000000 200.0000000000000000000000000000 200.0000000000 +3 3.140000000000000000 3.1400000000000000000000000000 9.4200000000 +4 3.140000000000000000 3.1400000000000000000000000000 3.1400000000 +4400 -4400.000000000000000000 -4400.0000000000000000000000000000 -4400.0000000000 NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT -key FROM DECIMAL_UDF @@ -3229,27 +3229,28 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [1, 0, 3, 6] + selectExpressions: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 4:double, CastDecimalToDouble(col 0:decimal(20,10)) -> 5:double) -> 6:double Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev(key), variance(key) + aggregations: sum(_col3), sum(_col2), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: stddev, VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: variance + aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: value (type: int) + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3259,9 +3260,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] - Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + valueColumnNums: [1, 2, 3] + Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3278,28 +3279,56 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:decimal(20,10), value:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev(VALUE._col0), variance(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 17 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4, 6] + selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 6:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 6:double) -> 5:double) -> 6:double Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3319,8 +3348,8 @@ POSTHOOK: Input: default@decimal_udf -11 0.0 0.0 -1234567890 0.0 0.0 -1255 0.0 0.0 -0 0.22561046704494161 0.050900082840236685 -1 0.05928102563215321 0.0035142400000000066 +0 0.22561046704494161 0.05090008284023669 +1 0.05928102563215448 0.003514240000000157 10 0.0 0.0 100 0.0 0.0 1234567890 0.0 0.0 @@ -3364,27 +3393,28 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct] Select Operator - expressions: key (type: decimal(20,10)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [1, 0, 3, 6] + selectExpressions: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 4:double, CastDecimalToDouble(col 0:decimal(20,10)) -> 5:double) -> 6:double Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_samp(key), var_samp(key) + aggregations: sum(_col3), sum(_col2), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: value (type: int) + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -3394,9 +3424,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] - Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: struct) + valueColumnNums: [1, 2, 3] + Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3413,28 +3443,56 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:decimal(20,10), value:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 17 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4, 9] + selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double, IfExprNullCondExpr(col 6:boolean, null, col 7:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 6:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 7:bigint) -> 8:bigint) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 9:double) -> 5:double, IfExprNullCondExpr(col 8:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 9:double Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -3454,8 +3512,8 @@ POSTHOOK: Input: default@decimal_udf -11 NULL NULL -1234567890 NULL NULL -1255 NULL NULL -0 0.2348228191855647 0.055141756410256405 -1 0.06627820154470102 0.004392800000000008 +0 0.23482281918556472 0.05514175641025642 +1 0.06627820154470243 0.0043928000000001965 10 NULL NULL 100 NULL NULL 1234567890 NULL NULL @@ -6235,18 +6293,18 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(key), count(key), avg(key) + aggregations: sum(key), count(key) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 0:decimal(15,3)) -> decimal(25,3), VectorUDAFCount(col 0:decimal(15,3)) -> bigint, VectorUDAFAvgDecimal(col 0:decimal(15,3)) -> struct + aggregators: VectorUDAFSumDecimal(col 0:decimal(15,3)) -> decimal(25,3), VectorUDAFCount(col 0:decimal(15,3)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1] keys: value (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -6257,9 +6315,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumnNums: [1, 2] Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint), _col3 (type: struct) + value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -6289,33 +6347,33 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:int, VALUE._col0:decimal(25,3), VALUE._col1:bigint, VALUE._col2:struct + dataColumnCount: 3 + dataColumns: KEY._col0:int, VALUE._col0:decimal(25,3), VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFSumDecimal(col 1:decimal(25,3)) -> decimal(25,3), VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(19,7) + aggregators: VectorUDAFSumDecimal(col 1:decimal(25,3)) -> decimal(25,3), VectorUDAFCountMerge(col 2:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,16)), _col3 (type: decimal(19,7)), _col1 (type: decimal(25,3)) + expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,16)), (CAST( _col1 AS decimal(19,7)) / _col2) (type: decimal(38,26)), _col1 (type: decimal(25,3)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 5, 3, 1] - selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(25,3), col 4:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,16) + projectedOutputColumnNums: [0, 4, 6, 1] + selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(25,3), col 3:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 4:decimal(38,16), DecimalColDivideDecimalColumn(col 5:decimal(19,7), col 3:decimal(19,0))(children: CastDecimalToDecimal(col 1:decimal(25,3)) -> 5:decimal(19,7), CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 6:decimal(38,26) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -6325,9 +6383,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumnNums: [4, 6, 1] Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(19,7)), _col3 (type: decimal(25,3)) + value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(38,26)), _col3 (type: decimal(25,3)) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -6340,12 +6398,12 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,16), VALUE._col1:decimal(19,7), VALUE._col2:decimal(25,3) + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,16), VALUE._col1:decimal(38,26), VALUE._col2:decimal(25,3) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,16)), VALUE._col1 (type: decimal(19,7)), VALUE._col2 (type: decimal(25,3)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,16)), VALUE._col1 (type: decimal(38,26)), VALUE._col2 (type: decimal(25,3)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator @@ -6377,23 +6435,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_udf_txt_small #### A masked pattern was here #### --1 -1.1200000000000000 -1.1200000 -2.240 --11 -1.1220000000000000 -1.1220000 -1.122 --1234567890 -1234567890.1230000000000000 -1234567890.1230000 -1234567890.123 --1255 -1255.4900000000000000 -1255.4900000 -1255.490 -0 0.0253846153846154 0.0253846 0.330 -1 1.0484000000000000 1.0484000 5.242 -10 10.0000000000000000 10.0000000 10.000 -100 100.0000000000000000 100.0000000 100.000 -1234567890 1234567890.1230000000000000 1234567890.1230000 1234567890.123 -124 124.0000000000000000 124.0000000 124.000 -125 125.2000000000000000 125.2000000 125.200 -2 2.0000000000000000 2.0000000 4.000 -20 20.0000000000000000 20.0000000 20.000 -200 200.0000000000000000 200.0000000 200.000 -3 3.1400000000000000 3.1400000 9.420 -4 3.1400000000000000 3.1400000 3.140 -4400 -4400.0000000000000000 -4400.0000000 -4400.000 +-1 -1.1200000000000000 -1.12000000000000000000000000 -2.240 +-11 -1.1220000000000000 -1.12200000000000000000000000 -1.122 +-1234567890 -1234567890.1230000000000000 -1234567890.12300000000000000000000000 -1234567890.123 +-1255 -1255.4900000000000000 -1255.49000000000000000000000000 -1255.490 +0 0.0253846153846154 0.02538461538461538461538462 0.330 +1 1.0484000000000000 1.04840000000000000000000000 5.242 +10 10.0000000000000000 10.00000000000000000000000000 10.000 +100 100.0000000000000000 100.00000000000000000000000000 100.000 +1234567890 1234567890.1230000000000000 1234567890.12300000000000000000000000 1234567890.123 +124 124.0000000000000000 124.00000000000000000000000000 124.000 +125 125.2000000000000000 125.20000000000000000000000000 125.200 +2 2.0000000000000000 2.00000000000000000000000000 4.000 +20 20.0000000000000000 20.00000000000000000000000000 20.000 +200 200.0000000000000000 200.00000000000000000000000000 200.000 +3 3.1400000000000000 3.14000000000000000000000000 9.420 +4 3.1400000000000000 3.14000000000000000000000000 3.140 +4400 -4400.0000000000000000 -4400.00000000000000000000000000 -4400.000 NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT -key FROM DECIMAL_UDF_txt_small @@ -7173,26 +7231,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:key:decimal(15,3), 1:value:int, 2:ROW__ID:struct] Select Operator - expressions: key (type: decimal(15,3)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(15,3)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [1, 0, 3, 6] + selectExpressions: CastDecimalToDouble(col 0:decimal(15,3)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(15,3)) -> 4:double, CastDecimalToDouble(col 0:decimal(15,3)) -> 5:double) -> 6:double Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev(key), variance(key) + aggregations: sum(_col3), sum(_col2), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: stddev, VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: variance + aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(15,3)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: value (type: int) + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -7203,9 +7262,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumnNums: [1, 2, 3] Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -7223,28 +7282,56 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:decimal(15,3), value:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev(VALUE._col0), variance(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4, 6] + selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 6:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 6:double) -> 5:double) -> 6:double Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -7264,8 +7351,8 @@ POSTHOOK: Input: default@decimal_udf_txt_small -11 0.0 0.0 -1234567890 0.0 0.0 -1255 0.0 0.0 -0 0.22561046704494161 0.050900082840236685 -1 0.05928102563215321 0.0035142400000000066 +0 0.22561046704494161 0.05090008284023669 +1 0.05928102563215448 0.003514240000000157 10 0.0 0.0 100 0.0 0.0 1234567890 0.0 0.0 @@ -7309,26 +7396,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:key:decimal(15,3), 1:value:int, 2:ROW__ID:struct] Select Operator - expressions: key (type: decimal(15,3)), value (type: int) - outputColumnNames: key, value + expressions: value (type: int), key (type: decimal(15,3)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [1, 0, 3, 6] + selectExpressions: CastDecimalToDouble(col 0:decimal(15,3)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(15,3)) -> 4:double, CastDecimalToDouble(col 0:decimal(15,3)) -> 5:double) -> 6:double Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(key), var_samp(key) + aggregations: sum(_col3), sum(_col2), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: stddev_samp, VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(15,3)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] - keys: value (type: int) + projectedOutputColumnNums: [0, 1, 2] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -7339,9 +7427,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumnNums: [1, 2, 3] Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -7359,28 +7447,56 @@ STAGE PLANS: includeColumns: [0, 1] dataColumns: key:decimal(15,3), value:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 4, 9] + selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double, IfExprNullCondExpr(col 6:boolean, null, col 7:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 6:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 7:bigint) -> 8:bigint) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 9:double) -> 5:double, IfExprNullCondExpr(col 8:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 9:double Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -7400,8 +7516,8 @@ POSTHOOK: Input: default@decimal_udf_txt_small -11 NULL NULL -1234567890 NULL NULL -1255 NULL NULL -0 0.2348228191855647 0.055141756410256405 -1 0.06627820154470102 0.004392800000000008 +0 0.23482281918556472 0.05514175641025642 +1 0.06627820154470243 0.0043928000000001965 10 NULL NULL 100 NULL NULL 1234567890 NULL NULL diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out index 80ecd59a16..0eca143451 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -69,24 +69,24 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: vectorized, llap @@ -99,33 +99,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 5, 4] + selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -177,24 +178,24 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: vectorized, llap @@ -207,33 +208,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 5, 4] + selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -311,24 +313,24 @@ STAGE PLANS: outputColumnNames: a, b, c Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c), count() + aggregations: sum(c), count(c), count() keys: a (type: string), b (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: bigint) + value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint) Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble + notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble vectorized: false Reducer 2 Execution mode: vectorized, llap @@ -341,24 +343,24 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [bigint] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgPartial2(col 2:struct) -> struct, VectorUDAFCountMerge(col 3:bigint) -> bigint + aggregators: VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: PARTIALS - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint) mode: partials - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) @@ -369,9 +371,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumnNums: [3, 4, 5] Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -383,33 +385,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: FINAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: final - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 5, 4] + selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index dabc987e6b..74caa3fee9 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -76,18 +76,18 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c_dec), count() + aggregations: sum(c_dec), count(c_dec), count() Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) @@ -98,9 +98,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumnNums: [3, 4, 5] Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -129,33 +129,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 6, 4] + selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20) Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -214,18 +215,18 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c_dec), count() + aggregations: sum(c_dec), count(c_dec), count() Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) @@ -236,9 +237,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumnNums: [3, 4, 5] Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -267,33 +268,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 6, 4] + selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20) Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -321,22 +323,22 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### a b _c2 _c3 -1 1 3.000000 2 -1 2 2.000000 1 -1 NULL 2.666667 3 -2 2 5.333333 3 -2 3 5.000000 2 -2 NULL 5.200000 5 -3 2 8.000000 1 -3 NULL 8.000000 1 -5 1 2.000000 1 -5 NULL 2.000000 1 -8 1 1.000000 2 -8 NULL 1.000000 2 -NULL 1 2.000000 5 -NULL 2 5.200000 5 -NULL 3 5.000000 2 -NULL NULL 3.833333 12 +1 1 3.00000000000000000000 2 +1 2 2.00000000000000000000 1 +1 NULL 2.66666666666666666667 3 +2 2 5.33333333333333333333 3 +2 3 5.00000000000000000000 2 +2 NULL 5.20000000000000000000 5 +3 2 8.00000000000000000000 1 +3 NULL 8.00000000000000000000 1 +5 1 2.00000000000000000000 1 +5 NULL 2.00000000000000000000 1 +8 1 1.00000000000000000000 2 +8 NULL 1.00000000000000000000 2 +NULL 1 2.00000000000000000000 5 +NULL 2 5.20000000000000000000 5 +NULL 3 5.00000000000000000000 2 +NULL NULL 3.83333333333333333333 12 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube PREHOOK: type: QUERY @@ -378,18 +380,18 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c_dec), count() + aggregations: sum(c_dec), count(c_dec), count() Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string, col 1:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: a (type: string), b (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -400,9 +402,9 @@ STAGE PLANS: keyColumnNums: [0, 1] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumnNums: [2, 3, 4] Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: bigint) + value expressions: _col2 (type: decimal(20,2)), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -431,24 +433,24 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 5 + dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [bigint] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalPartial2(col 2:struct) -> struct, VectorUDAFCountMerge(col 3:bigint) -> bigint + aggregators: VectorUDAFSumDecimal(col 2:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: PARTIALS - keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint + keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint native: false vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint) mode: partials - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) @@ -459,9 +461,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumnNums: [3, 4, 5] Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: bigint) + value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -473,33 +475,34 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 5 - dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint + dataColumnCount: 6 + dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) Group By Vectorization: - aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint + aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint className: VectorGroupByOperator groupByMode: FINAL keyExpressions: col 0:string, col 1:string, col 2:bigint native: false vectorProcessingMode: STREAMING - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0, 1, 2] keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: final - outputColumnNames: _col0, _col1, _col3, _col4 + outputColumnNames: _col0, _col1, _col3, _col4, _col5 Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint) + expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 6, 4] + selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20) Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -527,19 +530,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 #### A masked pattern was here #### a b _c2 _c3 -1 1 3.000000 2 -1 2 2.000000 1 -1 NULL 2.666667 3 -2 2 5.333333 3 -2 3 5.000000 2 -2 NULL 5.200000 5 -3 2 8.000000 1 -3 NULL 8.000000 1 -5 1 2.000000 1 -5 NULL 2.000000 1 -8 1 1.000000 2 -8 NULL 1.000000 2 -NULL 1 2.000000 5 -NULL 2 5.200000 5 -NULL 3 5.000000 2 -NULL NULL 3.833333 12 +1 1 3.00000000000000000000 2 +1 2 2.00000000000000000000 1 +1 NULL 2.66666666666666666667 3 +2 2 5.33333333333333333333 3 +2 3 5.00000000000000000000 2 +2 NULL 5.20000000000000000000 5 +3 2 8.00000000000000000000 1 +3 NULL 8.00000000000000000000 1 +5 1 2.00000000000000000000 1 +5 NULL 2.00000000000000000000 1 +8 1 1.00000000000000000000 2 +8 NULL 1.00000000000000000000 2 +NULL 1 2.00000000000000000000 5 +NULL 2 5.20000000000000000000 5 +NULL 3 5.00000000000000000000 2 +NULL NULL 3.83333333333333333333 12 diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index c21a4fc091..d90ebf0634 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -822,9 +822,9 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) + aggregations: sum(_col1), sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFSumLong(col 1:int) -> bigint, VectorUDAFAvgLong(col 1:int) -> struct, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgDouble(col 2:double) -> struct, VectorUDAFSumDecimal(col 3:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 3:decimal(38,18)) -> struct + aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFSumLong(col 1:int) -> bigint, VectorUDAFCount(col 1:int) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCount(col 2:double) -> bigint, VectorUDAFSumDecimal(col 3:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 3:decimal(38,18)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: ConstantVectorExpression(val 1) -> 4:int @@ -834,7 +834,7 @@ STAGE PLANS: keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -843,8 +843,8 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: decimal(38,18)), _col7 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -855,9 +855,9 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), sum(VALUE._col5), avg(VALUE._col6) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFAvgFinal(col 5:struct) -> double, VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 7:struct) -> decimal(38,18) + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFCountMerge(col 7:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int @@ -867,15 +867,15 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 1, 2, 3, 4, 5, 6, 7] - selectExpressions: ConstantVectorExpression(val 1) -> 8:int + projectedOutputColumnNums: [8, 1, 2, 10, 4, 9, 6, 12] + selectExpressions: ConstantVectorExpression(val 1) -> 8:int, DoubleColDivideLongColumn(col 9:double, col 3:bigint)(children: CastLongToDouble(col 2:bigint) -> 9:double) -> 10:double, DoubleColDivideLongColumn(col 4:double, col 5:bigint) -> 9:double, DecimalColDivideDecimalColumn(col 6:decimal(38,18), col 11:decimal(19,0))(children: CastLongToDecimal(col 7:bigint) -> 11:decimal(19,0)) -> 12:decimal(38,18) Statistics: Num rows: 1 Data size: 268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1046,9 +1046,9 @@ STAGE PLANS: projectedOutputColumnNums: [1, 0, 2, 3, 4] Statistics: Num rows: 500 Data size: 66000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFAvgLongComplete(col 2:int) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFAvgDoubleComplete(col 3:double) -> double, VectorUDAFSumDecimal(col 4:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimalComplete(col 4:decimal(38,18)) -> decimal(38,18) + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 3:double) -> bigint, VectorUDAFSumDecimal(col 4:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 4:decimal(38,18)) -> bigint className: VectorGroupByOperator groupByMode: COMPLETE keyExpressions: col 0:int, col 1:int @@ -1058,15 +1058,16 @@ STAGE PLANS: keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] - Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE + projectedOutputColumnNums: [0, 1, 2, 9, 4, 8, 6, 11] + selectExpressions: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: CastLongToDouble(col 2:bigint) -> 8:double) -> 9:double, DoubleColDivideLongColumn(col 4:double, col 5:bigint) -> 8:double, DecimalColDivideDecimalColumn(col 6:decimal(38,18), col 10:decimal(19,0))(children: CastLongToDecimal(col 7:bigint) -> 10:decimal(19,0)) -> 11:decimal(38,18) + Statistics: Num rows: 500 Data size: 132000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1074,7 +1075,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 132000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) Reducer 3 Execution mode: vectorized, llap @@ -1092,13 +1093,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] - Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 132000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 500 Data size: 290000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 132000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out b/ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 8d9ffb8410..c0cb6c0041 100644 --- a/ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -79,18 +79,18 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(gpa) + aggregations: sum(gpa), count(gpa) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 2:double) -> struct + aggregators: VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCount(col 2:double) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:string, col 1:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: name (type: string), age (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) @@ -102,9 +102,9 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] - valueColumnNums: [2] + valueColumnNums: [2, 3] Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: no inputs Map Vectorization: @@ -132,20 +132,20 @@ STAGE PLANS: vectorized: false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1, _col2 + expressions: _col1 (type: int), _col0 (type: string), _col2 (type: double), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col0: int, _col1: string, _col2: double + output shape: _col0: int, _col1: string, _col2: double, _col3: bigint type: WINDOWING Windowing table definition input alias: ptf_1 @@ -162,7 +162,7 @@ STAGE PLANS: window frame: ROWS PRECEDING(MAX)~FOLLOWING(MAX) Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: double), sum_window_0 (type: bigint) + expressions: _col0 (type: int), _col1 (type: string), (_col2 / _col3) (type: double), sum_window_0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 50 Data size: 9408 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 9eaf293e65..8fb07523ef 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -104,25 +104,26 @@ STAGE PLANS: predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5] + projectedOutputColumnNums: [2, 5, 1, 4, 0, 13, 18, 16, 20] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 17:double) -> 18:double, CastLongToDouble(col 1:smallint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 17:double, CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -130,9 +131,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -149,26 +150,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(13,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(13,3), double, double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:double, VALUE._col9:bigint, VALUE._col10:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0D) (type: double), (- (_col0 + -3728.0D)) (type: double), (- (- (_col0 + -3728.0D))) (type: double), ((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) * (- (- (_col0 + -3728.0D)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0D)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0D)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175D - _col4) (type: double), (- (10.175D - _col4)) (type: double), ((- _col2) / -563.0D) (type: double), _col6 (type: double), (- ((- _col2) / -563.0D)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0D)) (type: double), (- (_col0 / _col1)) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + -3728.0D) (type: double), (- ((_col0 / _col1) + -3728.0D)) (type: double), (- (- ((_col0 / _col1) + -3728.0D))) (type: double), ((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) (type: double), _col2 (type: double), (- (_col0 / _col1)) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) (type: double), (((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) * (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), (power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), ((power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) * power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (_col8 / _col9) (type: double), (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), (- (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END))) (type: double), ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), (- ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), ((_col0 / _col1) / _col2) (type: double), _col10 (type: tinyint), _col7 (type: bigint), (UDFToDouble(_col10) / ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), (- ((_col0 / _col1) / _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 13, 12, 15, 14, 2, 17, 16, 19, 18, 24, 25, 27, 26, 20, 30, 34, 31, 37, 41, 42, 10, 7, 44, 38] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double, DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 15:double) -> 14:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 16:double) -> 14:double) -> 16:double, DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 17:double) -> 14:double, DoubleColUnaryMinus(col 16:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 16:double) -> 17:double, FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 18:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColAddDoubleScalar(col 19:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 19:double) -> 20:double) -> 19:double) -> 20:double) -> 19:double, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 20:double) -> 18:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 20:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double, DoubleColSubtractDoubleColumn(col 20:double, col 26:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 25:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 25:double) -> 26:double) -> 25:double) -> 26:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: DoubleColSubtractDoubleColumn(col 20:double, col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double) -> 26:double) -> 20:double, DoubleColUnaryMinus(col 26:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColAddDoubleScalar(col 26:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 26:double) -> 27:double) -> 26:double) -> 27:double) -> 26:double, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double, DoubleColDivideLongColumn(col 20:double, col 29:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double, IfExprNullCondExpr(col 23:boolean, null, col 28:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 28:bigint) -> 29:bigint) -> 26:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 20:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 29:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 29:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 36:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 34:double) -> 31:double, IfExprNullCondExpr(col 33:boolean, null, col 35:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 35:bigint) -> 36:bigint) -> 34:double) -> 31:double) -> 34:double, DoubleColDivideDoubleScalar(col 37:double, val -563.0)(children: DoubleColUnaryMinus(col 31:double)(children: FuncPowerDoubleToDouble(col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double, FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 40:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 37:double) -> 38:double) -> 37:double, IfExprNullCondExpr(col 36:boolean, null, col 39:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 36:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 39:bigint) -> 40:bigint) -> 38:double) -> 37:double, DoubleColUnaryMinus(col 38:double)(children: DoubleColDivideDoubleScalar(col 41:double, val -563.0)(children: DoubleColUnaryMinus(col 38:double)(children: FuncPowerDoubleToDouble(col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double, DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 42:double, DoubleColDivideDoubleColumn(col 38:double, col 43:double)(children: CastLongToDouble(col 10:tinyint) -> 38:double, DoubleColDivideDoubleScalar(col 44:double, val -563.0)(children: DoubleColUnaryMinus(col 43:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 43:double)(children: DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 43:double) -> 38:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -287,25 +311,26 @@ STAGE PLANS: predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5] + projectedOutputColumnNums: [2, 5, 1, 4, 0, 18, 21, 22, 25] + selectExpressions: CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 1:smallint) -> 23:double, CastLongToDouble(col 1:smallint) -> 24:double) -> 25:double Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 25:double) -> double, VectorUDAFSumDouble(col 22:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -313,9 +338,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -332,26 +357,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double, double, decimal(22,3), decimal(13,3)] + scratchColumnTypeNames: [double, double, double, decimal(22,3), decimal(13,3), double, double, double, double, double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:double, VALUE._col9:bigint, VALUE._col10:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0D) (type: double), (- (_col0 + -3728.0D)) (type: double), (- (- (_col0 + -3728.0D))) (type: double), ((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) * (- (- (_col0 + -3728.0D)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0D)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0D)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175D - _col4) (type: double), (- (10.175D - _col4)) (type: double), ((- _col2) / -563.0D) (type: double), _col6 (type: double), (- ((- _col2) / -563.0D)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0D)) (type: double), (- (_col0 / _col1)) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + -3728.0D) (type: double), (- ((_col0 / _col1) + -3728.0D)) (type: double), (- (- ((_col0 / _col1) + -3728.0D))) (type: double), ((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) (type: double), _col2 (type: double), (- (_col0 / _col1)) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) (type: double), (((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) * (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), (power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), ((power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) * power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (_col8 / _col9) (type: double), (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), (- (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END))) (type: double), ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), (- ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), ((_col0 / _col1) / _col2) (type: double), _col10 (type: tinyint), _col7 (type: bigint), (UDFToDouble(_col10) / ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), (- ((_col0 / _col1) / _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 13, 16, 20, 27, 2, 29, 34, 46, 54, 60, 70, 86, 93, 94, 102, 111, 118, 126, 134, 136, 10, 7, 145, 148] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double, DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 19:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColAddDoubleScalar(col 17:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 17:double) -> 18:double) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 24:double, col 26:double)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColAddDoubleScalar(col 21:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 22:double) -> 23:double) -> 24:double, DoubleColAddDoubleScalar(col 25:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 25:double) -> 26:double) -> 27:double, DoubleColUnaryMinus(col 28:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 28:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DoubleColMultiplyDoubleColumn(col 41:double, col 45:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 40:double)(children: DoubleColUnaryMinus(col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColAddDoubleScalar(col 35:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 35:double) -> 36:double) -> 37:double) -> 38:double, DoubleColAddDoubleScalar(col 39:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 39:double) -> 40:double) -> 41:double, DoubleColUnaryMinus(col 44:double)(children: DoubleColUnaryMinus(col 43:double)(children: DoubleColAddDoubleScalar(col 42:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 42:double) -> 43:double) -> 44:double) -> 45:double) -> 46:double, FuncPowerDoubleToDouble(col 53:double)(children: DoubleColDivideLongColumn(col 49:double, col 52:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 48:double)(children: DoubleColDivideLongColumn(col 47:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 47:double) -> 48:double) -> 49:double, IfExprNullCondExpr(col 50:boolean, null, col 51:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 50:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 51:bigint) -> 52:bigint) -> 53:double) -> 54:double, DoubleColUnaryMinus(col 59:double)(children: FuncPowerDoubleToDouble(col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 56:double)(children: DoubleColDivideLongColumn(col 55:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 55:double) -> 56:double) -> 57:double) -> 58:double) -> 59:double) -> 60:double, DoubleColSubtractDoubleColumn(col 65:double, col 69:double)(children: FuncPowerDoubleToDouble(col 64:double)(children: DoubleColDivideLongColumn(col 63:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 62:double)(children: DoubleColDivideLongColumn(col 61:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 61:double) -> 62:double) -> 63:double) -> 64:double) -> 65:double, DoubleColUnaryMinus(col 68:double)(children: DoubleColUnaryMinus(col 67:double)(children: DoubleColAddDoubleScalar(col 66:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 66:double) -> 67:double) -> 68:double) -> 69:double) -> 70:double, DoubleColMultiplyDoubleColumn(col 80:double, col 85:double)(children: DoubleColSubtractDoubleColumn(col 75:double, col 79:double)(children: FuncPowerDoubleToDouble(col 74:double)(children: DoubleColDivideLongColumn(col 73:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 72:double)(children: DoubleColDivideLongColumn(col 71:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 71:double) -> 72:double) -> 73:double) -> 74:double) -> 75:double, DoubleColUnaryMinus(col 78:double)(children: DoubleColUnaryMinus(col 77:double)(children: DoubleColAddDoubleScalar(col 76:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 76:double) -> 77:double) -> 78:double) -> 79:double) -> 80:double, FuncPowerDoubleToDouble(col 84:double)(children: DoubleColDivideLongColumn(col 83:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 82:double)(children: DoubleColDivideLongColumn(col 81:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 81:double) -> 82:double) -> 83:double) -> 84:double) -> 85:double) -> 86:double, DoubleColDivideLongColumn(col 89:double, col 92:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 88:double)(children: DoubleColDivideLongColumn(col 87:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 87:double) -> 88:double) -> 89:double, IfExprNullCondExpr(col 90:boolean, null, col 91:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 90:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 91:bigint) -> 92:bigint) -> 93:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 94:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 101:double)(children: DoubleColDivideLongColumn(col 97:double, col 100:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 96:double)(children: DoubleColDivideLongColumn(col 95:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 95:double) -> 96:double) -> 97:double, IfExprNullCondExpr(col 98:boolean, null, col 99:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 98:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 99:bigint) -> 100:bigint) -> 101:double) -> 102:double, DoubleColUnaryMinus(col 110:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 109:double)(children: DoubleColDivideLongColumn(col 105:double, col 108:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 104:double)(children: DoubleColDivideLongColumn(col 103:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 103:double) -> 104:double) -> 105:double, IfExprNullCondExpr(col 106:boolean, null, col 107:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 106:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 107:bigint) -> 108:bigint) -> 109:double) -> 110:double) -> 111:double, DoubleColDivideDoubleScalar(col 117:double, val -563.0)(children: DoubleColUnaryMinus(col 116:double)(children: FuncPowerDoubleToDouble(col 115:double)(children: DoubleColDivideLongColumn(col 114:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 113:double)(children: DoubleColDivideLongColumn(col 112:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 112:double) -> 113:double) -> 114:double) -> 115:double) -> 116:double) -> 117:double) -> 118:double, FuncPowerDoubleToDouble(col 125:double)(children: DoubleColDivideLongColumn(col 121:double, col 124:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 120:double)(children: DoubleColDivideLongColumn(col 119:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 119:double) -> 120:double) -> 121:double, IfExprNullCondExpr(col 122:boolean, null, col 123:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 122:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 123:bigint) -> 124:bigint) -> 125:double) -> 126:double, DoubleColUnaryMinus(col 133:double)(children: DoubleColDivideDoubleScalar(col 132:double, val -563.0)(children: DoubleColUnaryMinus(col 131:double)(children: FuncPowerDoubleToDouble(col 130:double)(children: DoubleColDivideLongColumn(col 129:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 128:double)(children: DoubleColDivideLongColumn(col 127:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 127:double) -> 128:double) -> 129:double) -> 130:double) -> 131:double) -> 132:double) -> 133:double) -> 134:double, DoubleColDivideDoubleColumn(col 135:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 135:double) -> 136:double, DoubleColDivideDoubleColumn(col 137:double, col 144:double)(children: CastLongToDouble(col 10:tinyint) -> 137:double, DoubleColDivideDoubleScalar(col 143:double, val -563.0)(children: DoubleColUnaryMinus(col 142:double)(children: FuncPowerDoubleToDouble(col 141:double)(children: DoubleColDivideLongColumn(col 140:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 139:double)(children: DoubleColDivideLongColumn(col 138:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 138:double) -> 139:double) -> 140:double) -> 141:double) -> 142:double) -> 143:double) -> 144:double) -> 145:double, DoubleColUnaryMinus(col 147:double)(children: DoubleColDivideDoubleColumn(col 146:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 146:double) -> 147:double) -> 148:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out index a82126515c..8e73eb0278 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -2883,9 +2883,9 @@ STAGE PLANS: window function: GenericUDAFAverageEvaluatorDouble window frame: RANGE PRECEDING(MAX)~CURRENT window function definition - alias: stddev_window_7 + alias: stddev_pop_window_7 arguments: _col5 - name: stddev + name: stddev_pop window function: GenericUDAFStdEvaluator window frame: RANGE PRECEDING(MAX)~CURRENT window function definition @@ -2902,15 +2902,15 @@ STAGE PLANS: window frame: RANGE PRECEDING(MAX)~CURRENT Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int) - outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, percent_rank_window_3, ntile_window_4, count_window_5, avg_window_6, stddev_window_7, first_value_window_8, last_value_window_9, _col1, _col2, _col5 + expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_pop_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int) + outputColumnNames: rank_window_0, dense_rank_window_1, cume_dist_window_2, percent_rank_window_3, ntile_window_4, count_window_5, avg_window_6, stddev_pop_window_7, first_value_window_8, last_value_window_9, _col1, _col2, _col5 Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col5 (type: int) + value expressions: rank_window_0 (type: int), dense_rank_window_1 (type: int), cume_dist_window_2 (type: double), percent_rank_window_3 (type: double), ntile_window_4 (type: int), count_window_5 (type: bigint), avg_window_6 (type: double), stddev_pop_window_7 (type: double), first_value_window_8 (type: int), last_value_window_9 (type: int), _col5 (type: int) Reducer 3 Execution mode: llap Reduce Vectorization: @@ -4086,7 +4086,7 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: PTF Output Columns expression for PTF operator: Data type array of column collect_set_window_2 not supported + notVectorizedReason: PTF Output Columns expression for PTF operator: Data type array of column collect_set_window_1 not supported vectorized: false Reduce Operator Tree: Select Operator @@ -4107,44 +4107,38 @@ STAGE PLANS: raw input shape: window functions: window function definition - alias: stddev_window_0 - arguments: _col7 - name: stddev - window function: GenericUDAFStdEvaluator - window frame: ROWS PRECEDING(2)~FOLLOWING(2) - window function definition - alias: stddev_pop_window_1 + alias: stddev_pop_window_0 arguments: _col7 name: stddev_pop window function: GenericUDAFStdEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) window function definition - alias: collect_set_window_2 + alias: collect_set_window_1 arguments: _col5 name: collect_set window function: GenericUDAFMkCollectionEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) window function definition - alias: variance_window_3 + alias: var_pop_window_2 arguments: _col7 - name: variance + name: var_pop window function: GenericUDAFVarianceEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) window function definition - alias: corr_window_4 + alias: corr_window_3 arguments: _col5, _col7 name: corr window function: GenericUDAFCorrelationEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) window function definition - alias: covar_pop_window_5 + alias: covar_pop_window_4 arguments: _col5, _col7 name: covar_pop window function: GenericUDAFCovarianceEvaluator window frame: ROWS PRECEDING(2)~FOLLOWING(2) Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), stddev_window_0 (type: double), stddev_pop_window_1 (type: double), collect_set_window_2 (type: array), variance_window_3 (type: double), round(corr_window_4, 5) (type: double), covar_pop_window_5 (type: double) + expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), stddev_pop_window_0 (type: double), stddev_pop_window_0 (type: double), collect_set_window_1 (type: array), var_pop_window_2 (type: double), round(corr_window_3, 5) (type: double), covar_pop_window_4 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 9958 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 1a846ab595..b2db5a5a9d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -408,18 +408,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -432,23 +432,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -459,7 +464,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -504,7 +509,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +-4.344925324321378 1158.3003004768175 1158.3003004768175 1158.426587033782 34.03381113652741 34.03381113652741 34.03381113652741 34.03566639620535 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), @@ -915,18 +920,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint + expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -939,23 +944,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -966,7 +976,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1011,7 +1021,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +-1.8515862077935246E8 2.07689300543070106E18 2.07689300543070106E18 2.07711944383076992E18 1.441142951074147E9 1.441142951074147E9 1.441142951074147E9 1.441221511021387E9 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), @@ -1422,18 +1432,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat + expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 36696 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1446,23 +1456,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1473,7 +1488,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1518,7 +1533,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +-4.303895780321011 1163.8972588605056 1163.8972588605056 1164.0241556397098 34.11593848717203 34.11593848717203 34.11593848717203 34.11779822379677 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), @@ -1606,25 +1621,26 @@ STAGE PLANS: predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cfloat + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4] + projectedOutputColumnNums: [3, 4, 0, 14, 17] + selectExpressions: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 16:double) -> 17:double Statistics: Num rows: 12288 Data size: 1210980 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1632,9 +1648,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1651,26 +1667,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double] + scratchColumnTypeNames: [decimal(13,3), double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), min(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinLong(col 6:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0D + _col0) (type: double), _col1 (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) + (-6432.0D + _col0)) (type: double), _col2 (type: double), (- (-6432.0D + _col0)) (type: double), (-6432.0D + (- (-6432.0D + _col0))) (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) / (- (-6432.0D + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0D + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + expressions: (_col0 / _col1) (type: double), (- (_col0 / _col1)) (type: double), (-6432.0D + (_col0 / _col1)) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) + (-6432.0D + (_col0 / _col1))) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), (-6432.0D + (- (-6432.0D + (_col0 / _col1)))) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) / (- (-6432.0D + (_col0 / _col1)))) (type: double), _col4 (type: bigint), _col5 (type: double), (((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) % power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5)) (type: double), (- ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), ((- (-6432.0D + (_col0 / _col1))) * (- (_col0 / _col1))) (type: double), _col6 (type: tinyint), (- _col6) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [7, 9, 10, 8, 11, 13, 14, 12, 19, 18, 22, 4, 5, 25, 20, 28, 6, 27] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 9:double, DoubleScalarAddDoubleColumn(val -6432.0, col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 10:double, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 8:double) -> 11:double) -> 8:double) -> 11:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 11:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double) -> 11:double, DoubleColAddDoubleColumn(col 12:double, col 14:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 13:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 13:double) -> 14:double) -> 13:double, DoubleColDivideLongColumn(col 12:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 12:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 12:double) -> 14:double) -> 12:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double, DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 18:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColDivideDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 20:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 20:double) -> 21:double) -> 20:double, DoubleColUnaryMinus(col 22:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 22:double) -> 21:double) -> 22:double, DoubleColModuloDoubleColumn(col 21:double, col 20:double)(children: DoubleColDivideLongColumn(col 20:double, col 24:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 17:boolean, null, col 23:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 23:bigint) -> 24:bigint) -> 21:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 24:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 21:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 21:double, col 29:double)(children: DoubleColUnaryMinus(col 28:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 28:double) -> 21:double, DoubleColUnaryMinus(col 28:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 28:double) -> 29:double) -> 28:double, LongColUnaryMinus(col 6:tinyint) -> 27:tinyint Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1740,7 +1779,7 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593627E9 3.875716535945533E8 0.0 2.0634715172019392E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0516820315185745E9 -2.0634715172019392E18 1.5020929380914048E17 -64 64 PREHOOK: query: EXPLAIN extended select count(*) from alltypesorc where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or diff --git a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out index dbee077cc9..c87926c0a6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -72,25 +72,26 @@ STAGE PLANS: predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, cfloat, cdouble + expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5] + projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 12288 Data size: 330276 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -98,9 +99,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -117,26 +118,49 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:tinyint, VALUE._col5:int, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFMaxLong(col 4:tinyint) -> tinyint, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 10, 3, 12, 14, 4, 13, 5, 17, 18, 22, 9, 21] + selectExpressions: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 11:double) -> 10:double) -> 11:double, DoubleColDivideDoubleScalar(col 12:double, val -26.28)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 12:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 12:double) -> 10:double) -> 12:double) -> 10:double, DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 15:double) -> 13:double, DecimalColMultiplyDecimalScalar(col 16:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 5:int) -> 16:decimal(10,0)) -> 17:decimal(16,3), DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 18:double)(children: DoubleColDivideLongColumn(col 15:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 15:double) -> 18:double) -> 15:double, IfExprNullCondExpr(col 19:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 18:double, DoubleScalarModuloDoubleColumn(val 10.175, col 15:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 15:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double, LongScalarModuloLongColumn(val -563, col 5:int) -> 21:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,4 +217,4 @@ WHERE (((cdouble > ctinyint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563 +1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out index c9faf5596d..0ead6c4c97 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -95,27 +95,28 @@ STAGE PLANS: predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) - outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 5, 6, 10] + projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: - aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop + aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4] - keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -125,9 +126,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] - Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + valueColumnNums: [4, 5, 6, 7, 8, 9, 10] + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -144,29 +145,56 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:bigint, VALUE._col6:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumLong(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 154 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double) + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 20:double) -> 15:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 22:double, DecimalScalarAddDecimalColumn(val -5638.15, col 23:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 23:decimal(19,0)) -> 24:decimal(22,2), DoubleColDivideDoubleColumn(col 21:double, col 25:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 21:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 25:double) -> 26:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 25:double) -> 21:double) -> 25:double, DoubleColAddDoubleColumn(col 27:double, col 28:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 27:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 28:double) -> 21:double, FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double + Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ - Statistics: Num rows: 1 Data size: 338 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 Execution mode: vectorized, llap diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 82982e68f1..d72c29858a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -97,27 +97,28 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2730 Data size: 510974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -127,9 +128,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] - Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + Statistics: Num rows: 2730 Data size: 510974 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -146,28 +147,55 @@ STAGE PLANS: includeColumns: [0, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4)] + scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:float, VALUE._col9:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 211860 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1365 Data size: 255540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -423,27 +451,28 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 5461 Data size: 901772 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 2730 Data size: 510974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -452,8 +481,8 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + Statistics: Num rows: 2730 Data size: 510974 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -466,26 +495,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1365 Data size: 211860 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1365 Data size: 255540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1365 Data size: 446640 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out index eaf51570ec..7ae99a3e5d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -97,28 +97,28 @@ STAGE PLANS: predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) Statistics: Num rows: 606 Data size: 105558 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 4, 6, 10, 5, 14] - selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double + projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] + selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double Statistics: Num rows: 606 Data size: 105558 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 14:double) -> struct aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 303 Data size: 137686 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 303 Data size: 52846 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -128,9 +128,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] - Statistics: Num rows: 303 Data size: 137686 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) + valueColumnNums: [5, 6, 7, 8, 9, 10, 11] + Statistics: Num rows: 303 Data size: 52846 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -147,28 +147,55 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:float, VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDouble(col 8:float) -> float, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 151 Data size: 25224 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 151 Data size: 26432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 13:double) -> 15:double) -> 13:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 15:float, DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleColUnaryMinus(col 8:float) -> 20:float, DoubleColDivideDoubleScalar(col 22:double, val 10.175)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 21:double) -> 22:double) -> 21:double, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleScalar(col 24:double, val 10.175)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 23:double) -> 24:double) -> 23:double) -> 24:double, DoubleScalarModuloDoubleColumn(val -1.389, col 23:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 23:double) -> 25:double) -> 23:double, IfExprNullCondExpr(col 18:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 23:double) -> 25:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 23:double, DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColModuloDoubleScalar(col 30:double, val 10.175)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 27:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 30:double) -> 34:double Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] Statistics: Num rows: 151 Data size: 36700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 088721296a..31363df200 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -93,27 +93,28 @@ STAGE PLANS: predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10] + projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 6144 Data size: 3293884 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -123,9 +124,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4, 5, 6] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] - Statistics: Num rows: 6144 Data size: 3293884 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) + valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] + Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -142,7 +143,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double, double] Reducer 2 Execution mode: llap Reduce Vectorization: @@ -151,13 +152,13 @@ STAGE PLANS: enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 3072 Data size: 541028 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 + Statistics: Num rows: 3072 Data size: 639332 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out index dd2e5f0e33..59f2d1025d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -70,39 +70,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] - Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + valueColumnNums: [3, 4, 5, 6] + Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -119,27 +120,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1024 Data size: 143566 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1024 Data size: 151758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 96badf9b05..83833da81b 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -76,25 +76,26 @@ STAGE PLANS: predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3, 4, 5] + projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -102,9 +103,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -121,26 +122,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:bigint, VALUE._col7:tinyint, VALUE._col8:double, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 % -563.0D) (type: double), (_col0 + 762.0D) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0D) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) % -563.0D) (type: double), ((_col0 / _col1) + 762.0D) (type: double), _col2 (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), (- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) (type: double), (_col2 - (_col0 / _col1)) (type: double), _col6 (type: bigint), (- (_col2 - (_col0 / _col1))) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / _col5) - 762.0D) (type: double), _col7 (type: tinyint), ((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) (type: double), (_col8 / _col9) (type: double), (((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) - _col2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 12, 13, 2, 14, 11, 16, 6, 15, 17, 7, 20, 18, 19] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 10:double, DoubleColModuloDoubleScalar(col 11:double, val -563.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double, DoubleColAddDoubleScalar(col 11:double, val 762.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 14:double) -> 11:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 15:double) -> 11:double) -> 15:double) -> 11:double, DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 17:double) -> 15:double, DoubleColSubtractDoubleScalar(col 18:double, val 762.0)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, DoubleColAddDoubleColumn(col 18:double, col 19:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 18:double) -> 19:double) -> 18:double) -> 19:double) -> 18:double, CastLongToDouble(col 7:tinyint) -> 19:double) -> 20:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 18:double, DoubleColSubtractDoubleColumn(col 22:double, col 2:double)(children: DoubleColAddDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 19:double) -> 21:double) -> 19:double) -> 21:double) -> 19:double, CastLongToDouble(col 7:tinyint) -> 21:double) -> 22:double) -> 19:double Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -201,4 +225,4 @@ WHERE (((ctimestamp1 < ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378778E18 -1.49936299222378778E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378701E18 -64 -1.49936299222378778E18 -5650.1297631138395 -1.49936299222378496E18 +-5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378906E18 -1.49936299222378906E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378829E18 -64 -1.49936299222378906E18 -5650.1297631138395 -1.49936299222378624E18 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out index b472c2d796..3c502cd1c6 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -81,25 +81,26 @@ STAGE PLANS: predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, csmallint, cint, cfloat + expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4] + projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 19:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -107,9 +108,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -126,26 +127,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumLong(col 10:bigint) -> bigint, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 13:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 - 10.175D) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175D)) (type: double), (- _col1) (type: double), (_col0 % 79.553D) (type: double), (- (_col0 * (_col0 - 10.175D))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175D))) / (_col0 - 10.175D)) (type: double), (- (_col0 - 10.175D)) (type: double), _col4 (type: double), (-3728.0D - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + expressions: power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5)) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) % 79.553D) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5) (type: double), (- power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), _col9 (type: double), ((- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) / (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (_col10 / _col11) (type: double), (-3728.0D - power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), power(((_col12 - ((_col13 * _col13) / _col11)) / _col11), 0.5) (type: double), ((_col10 / _col11) / power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [14, 19, 15, 23, 26, 29, 22, 32, 40, 9, 43, 35, 46, 54, 53, 59] + selectExpressions: FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColSubtractDoubleScalar(col 15:double, val 10.175)(children: FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 15:double) -> 19:double) -> 15:double, IfExprNullCondExpr(col 18:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 19:double) -> 15:double) -> 19:double, FuncPowerDoubleToDouble(col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 22:double, col 26:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 23:double) -> 22:double, IfExprNullCondExpr(col 21:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 22:double, DoubleColSubtractDoubleScalar(col 23:double, val 10.175)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 23:double) -> 26:double) -> 23:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 23:double) -> 26:double) -> 23:double, DoubleColUnaryMinus(col 22:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double, DoubleColModuloDoubleScalar(col 22:double, val 79.553)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 31:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 29:double) -> 22:double, IfExprNullCondExpr(col 28:boolean, null, col 30:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 30:bigint) -> 31:bigint) -> 29:double) -> 22:double) -> 29:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColMultiplyDoubleColumn(col 22:double, col 35:double)(children: FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 32:double) -> 22:double, IfExprNullCondExpr(col 31:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 31:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 32:double) -> 22:double, DoubleColSubtractDoubleScalar(col 32:double, val 10.175)(children: FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 34:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double) -> 32:double) -> 35:double) -> 32:double) -> 22:double, FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 39:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 37:boolean, null, col 38:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 38:bigint) -> 39:bigint) -> 35:double) -> 32:double, DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 42:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 40:double) -> 35:double, IfExprNullCondExpr(col 39:boolean, null, col 41:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 39:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 41:bigint) -> 42:bigint) -> 40:double) -> 35:double) -> 40:double, DoubleColDivideDoubleColumn(col 35:double, col 46:double)(children: DoubleColUnaryMinus(col 43:double)(children: DoubleColMultiplyDoubleColumn(col 35:double, col 46:double)(children: FuncPowerDoubleToDouble(col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 43:double) -> 35:double, IfExprNullCondExpr(col 42:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 42:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 45:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 45:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 50:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 48:boolean, null, col 49:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 48:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 49:bigint) -> 50:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColSubtractDoubleScalar(col 35:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 52:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 46:double) -> 35:double, IfExprNullCondExpr(col 50:boolean, null, col 51:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 50:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 51:bigint) -> 52:bigint) -> 46:double) -> 35:double) -> 46:double) -> 35:double, LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 46:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 53:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 56:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 53:double) -> 54:double) -> 53:double, IfExprNullCondExpr(col 52:boolean, null, col 55:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 52:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 55:bigint) -> 56:bigint) -> 54:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 53:double) -> 57:double) -> 53:double) -> 57:double) -> 53:double, DoubleColDivideDoubleColumn(col 57:double, col 58:double)(children: LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 57:double, FuncPowerDoubleToDouble(col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 61:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 58:double) -> 59:double) -> 58:double, IfExprNullCondExpr(col 56:boolean, null, col 60:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 56:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 60:bigint) -> 61:bigint) -> 59:double) -> 58:double) -> 59:double Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -211,4 +235,4 @@ WHERE (((cint <= cfloat) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.34690095515641 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL +0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.3469009551564 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/llap/vectorization_4.q.out b/ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 122f3fbb3f..a8cfa48425 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -76,17 +76,18 @@ STAGE PLANS: predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) - outputColumnNames: ctinyint, cint, cdouble + expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 5] + projectedOutputColumnNums: [2, 5, 0, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 12288 Data size: 256884 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -94,7 +95,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -103,8 +104,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -121,26 +122,49 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 5] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint, VALUE._col4:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2)) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2))) (type: double) + expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (- power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5)) (type: double), (_col2 / _col3) (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3))) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 6, 7, 9, 8, 11, 15, 14, 13, 18, 4, 4, 19] + selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, FuncPowerDoubleToDouble(col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 7:double) -> 8:double) -> 7:double) -> 8:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: FuncPowerDoubleToDouble(col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 8:double, LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 11:bigint, DoubleColDivideDoubleColumn(col 13:double, col 14:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 14:double) -> 15:double, DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleColumn(col 13:double, col 16:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 16:double) -> 17:double) -> 13:double, LongColSubtractLongColumn(col 10:bigint, col 12:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 10:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 12:bigint) -> 18:bigint, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 4:tinyint) -> 16:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 17:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 19:double) -> 20:double) -> 17:double) -> 19:double Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -201,4 +225,4 @@ WHERE (((csmallint >= cint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --493101012745 277615870175435 -493101016473 136727.7868296355 -136727.7868296355 2298.5515807767374 0 0.0 1.8694487691330246E10 -0.0 -278108971191908 -64 -64 0.0 +-493101012745 277615870175435 -493101016473 136727.78682963562 -136727.78682963562 2298.5515807767374 0 0.0 1.8694487691330276E10 -0.0 -278108971191908 -64 -64 0.0 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out index dd2e5f0e33..59f2d1025d 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -70,39 +70,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 769522 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] - Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + valueColumnNums: [3, 4, 5, 6] + Statistics: Num rows: 2048 Data size: 303516 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -119,27 +120,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1024 Data size: 143566 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1024 Data size: 151758 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1024 Data size: 307406 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out index ad6bae763a..8353306834 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_input_format_excludes.q.out @@ -173,21 +173,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -200,26 +200,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -249,36 +254,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -291,37 +296,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -332,28 +337,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -364,18 +369,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: explain vectorization select * from alltypes_parquet @@ -499,21 +504,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -521,26 +526,31 @@ STAGE PLANS: enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -570,36 +580,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -612,37 +622,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -653,28 +663,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -685,18 +695,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: explain vectorization select * from alltypes_parquet @@ -825,21 +835,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -852,26 +862,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -901,36 +916,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -943,37 +958,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -984,28 +999,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -1016,18 +1031,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: create table if not exists alltypes_orc ( cint int, @@ -1199,21 +1214,21 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: llap LLAP IO: all inputs Map Vectorization: @@ -1221,26 +1236,31 @@ STAGE PLANS: enabledConditionsNotMet: hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.orc.OrcInputFormat IS false inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1270,36 +1290,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -1312,37 +1332,37 @@ POSTHOOK: Input: default@alltypes_orc -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -1353,28 +1373,28 @@ POSTHOOK: Input: default@alltypes_orc 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -1385,18 +1405,18 @@ POSTHOOK: Input: default@alltypes_orc 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: create table orcTbl (t1 tinyint, t2 tinyint) stored as orc diff --git a/ql/src/test/results/clientpositive/llap/vectorization_not.q.out b/ql/src/test/results/clientpositive/llap/vectorization_not.q.out index b5587ba4d0..cf92a6f694 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_not.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_not.q.out @@ -55,4 +55,4 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593627E9 3.875716535945533E8 0.0 2.0634715172019392E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0516820315185745E9 -2.0634715172019392E18 1.5020929380914048E17 -64 64 diff --git a/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out index ca2aa87176..43f999e677 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_pushdown.q.out @@ -33,14 +33,14 @@ STAGE PLANS: outputColumnNames: cbigint Statistics: Num rows: 4096 Data size: 48944 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cbigint) + aggregations: sum(cbigint), count(cbigint) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -62,17 +62,21 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 1023143f97..c93bf364a2 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -103,33 +103,34 @@ STAGE PLANS: predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5] + projectedOutputColumnNums: [2, 5, 1, 4, 0, 13, 18, 16, 20] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 17:double) -> 18:double, CastLongToDouble(col 1:smallint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 17:double, CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -142,24 +143,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0D) (type: double), (- (_col0 + -3728.0D)) (type: double), (- (- (_col0 + -3728.0D))) (type: double), ((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) * (- (- (_col0 + -3728.0D)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0D)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0D)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175D - _col4) (type: double), (- (10.175D - _col4)) (type: double), ((- _col2) / -563.0D) (type: double), _col6 (type: double), (- ((- _col2) / -563.0D)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0D)) (type: double), (- (_col0 / _col1)) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + -3728.0D) (type: double), (- ((_col0 / _col1) + -3728.0D)) (type: double), (- (- ((_col0 / _col1) + -3728.0D))) (type: double), ((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) (type: double), _col2 (type: double), (- (_col0 / _col1)) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) (type: double), (((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) * (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), (power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), ((power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) * power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (_col8 / _col9) (type: double), (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), (- (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END))) (type: double), ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), (- ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), ((_col0 / _col1) / _col2) (type: double), _col10 (type: tinyint), _col7 (type: bigint), (UDFToDouble(_col10) / ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), (- ((_col0 / _col1) / _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 13, 12, 15, 14, 2, 17, 16, 19, 18, 24, 25, 27, 26, 20, 30, 34, 31, 37, 41, 42, 10, 7, 44, 38] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double, DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 15:double) -> 14:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 16:double) -> 14:double) -> 16:double, DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 17:double) -> 14:double, DoubleColUnaryMinus(col 16:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 16:double) -> 17:double, FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 18:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColAddDoubleScalar(col 19:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 19:double) -> 20:double) -> 19:double) -> 20:double) -> 19:double, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 20:double) -> 18:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 20:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double, DoubleColSubtractDoubleColumn(col 20:double, col 26:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 25:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 25:double) -> 26:double) -> 25:double) -> 26:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: DoubleColSubtractDoubleColumn(col 20:double, col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double) -> 26:double) -> 20:double, DoubleColUnaryMinus(col 26:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColAddDoubleScalar(col 26:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 26:double) -> 27:double) -> 26:double) -> 27:double) -> 26:double, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double, DoubleColDivideLongColumn(col 20:double, col 29:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double, IfExprNullCondExpr(col 23:boolean, null, col 28:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 28:bigint) -> 29:bigint) -> 26:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 20:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 29:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 29:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 36:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 34:double) -> 31:double, IfExprNullCondExpr(col 33:boolean, null, col 35:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 35:bigint) -> 36:bigint) -> 34:double) -> 31:double) -> 34:double, DoubleColDivideDoubleScalar(col 37:double, val -563.0)(children: DoubleColUnaryMinus(col 31:double)(children: FuncPowerDoubleToDouble(col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double, FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 40:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 37:double) -> 38:double) -> 37:double, IfExprNullCondExpr(col 36:boolean, null, col 39:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 36:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 39:bigint) -> 40:bigint) -> 38:double) -> 37:double, DoubleColUnaryMinus(col 38:double)(children: DoubleColDivideDoubleScalar(col 41:double, val -563.0)(children: DoubleColUnaryMinus(col 38:double)(children: FuncPowerDoubleToDouble(col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double, DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 42:double, DoubleColDivideDoubleColumn(col 38:double, col 43:double)(children: CastLongToDouble(col 10:tinyint) -> 38:double, DoubleColDivideDoubleScalar(col 44:double, val -563.0)(children: DoubleColUnaryMinus(col 43:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 43:double)(children: DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 43:double) -> 38:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -248,7 +265,7 @@ WHERE ((762 = cbigint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900256E8 4.095728233294762E24 8549.657499338187 -5.481251832900256E8 3.8812872199726474E8 2.12743126884874112E17 3.0054786945575034E17 -5.700752675298234 -3.0054786945575034E17 3.0054786945575034E17 973579.3664121237 5.48222463472403E8 -973579.3664121237 -18.377427808018613 -64 2044 -6.573680812059066E-5 18.377427808018613 +1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900251E8 4.095728233294762E24 8549.657499338193 -5.481251832900251E8 3.8812872199726427E8 2.12743126884873664E17 3.0054786945574982E17 -5.700752675298234 -3.0054786945574982E17 3.0054786945574982E17 973579.3664121227 5.482224634724026E8 -973579.3664121227 -18.377427808018613 -64 2044 -6.573680812059072E-5 18.377427808018613 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(cint), (MAX(cint) / -3728), @@ -348,33 +365,34 @@ STAGE PLANS: predicate: (((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cbigint <= 197L) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28D) and (UDFToDouble(csmallint) > cdouble)) or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble + expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] + projectedOutputColumnNums: [2, 3, 1, 5, 0, 13, 16, 14, 18, 15, 20] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 2:int) -> 17:double, CastLongToDouble(col 2:int) -> 19:double) -> 20:double Statistics: Num rows: 6826 Data size: 1131534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + aggregations: max(_col0), sum(_col6), sum(_col5), count(_col1), sum(_col8), sum(_col7), count(_col2), max(_col3), sum(_col4), count(_col4), min(_col0), min(_col3), sum(_col10), sum(_col9), count(_col0) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFAvgLong(col 0:tinyint) -> struct, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp + aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumLong(col 0:tinyint) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: int), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -387,24 +405,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10), min(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), count(VALUE._col14) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFMaxDouble(col 7:double) -> double, VectorUDAFSumLong(col 8:bigint) -> bigint, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:int) -> int, VectorUDAFMinDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCountMerge(col 14:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0D) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175D) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175D)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28D) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0D)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0D) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) + expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0D) (type: double), (_col0 * -3728) (type: int), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (_col0 * -3728)) (type: int), power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) (type: double), (-563 % (_col0 * -3728)) (type: int), (((_col1 - ((_col2 * _col2) / _col3)) / _col3) / power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5)) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5)) (type: double), _col7 (type: double), (_col8 / _col9) (type: double), (power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) - 10.175D) (type: double), _col10 (type: int), (UDFToDouble((_col0 * -3728)) % (power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) - 10.175D)) (type: double), (- _col7) (type: double), _col11 (type: double), (_col7 % -26.28D) (type: double), power(((_col4 - ((_col5 * _col5) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END), 0.5) (type: double), (- (UDFToDouble(_col0) / -3728.0D)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0D) - (_col8 / _col9)) (type: double), (- (_col0 * -3728)) (type: int), ((_col12 - ((_col13 * _col13) / _col14)) / CASE WHEN ((_col14 = 1L)) THEN (null) ELSE ((_col14 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 16, 17, 18, 20, 15, 22, 24, 23, 7, 21, 26, 10, 27, 25, 11, 28, 29, 30, 32, 37, 35, 36] + selectExpressions: DoubleColDivideDoubleScalar(col 15:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 15:double) -> 16:double, LongColMultiplyLongScalar(col 0:int, val -3728) -> 17:int, DoubleColDivideLongColumn(col 15:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 18:double)(children: DoubleColDivideLongColumn(col 15:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 15:double) -> 18:double) -> 15:double) -> 18:double, LongColUnaryMinus(col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 20:int, FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 15:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 21:double)(children: DoubleColDivideLongColumn(col 15:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double) -> 21:double) -> 15:double) -> 21:double) -> 15:double, LongScalarModuloLongColumn(val -563, col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 22:int, DoubleColDivideDoubleColumn(col 23:double, col 21:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 24:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 21:double) -> 24:double) -> 21:double) -> 24:double) -> 21:double) -> 24:double, DoubleColUnaryMinus(col 21:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, LongColDivideLongColumn(col 8:bigint, col 9:bigint) -> 21:double, DoubleColSubtractDoubleScalar(col 25:double, val 10.175)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 25:double) -> 26:double) -> 25:double) -> 26:double) -> 25:double) -> 26:double, DoubleColModuloDoubleColumn(col 25:double, col 28:double)(children: CastLongToDouble(col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 25:double, DoubleColSubtractDoubleScalar(col 27:double, val 10.175)(children: FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double, DoubleColUnaryMinus(col 7:double) -> 25:double, DoubleColModuloDoubleScalar(col 7:double, val -26.28) -> 28:double, FuncPowerDoubleToDouble(col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 19:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 33:double)(children: DoubleColDivideDoubleScalar(col 30:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 30:double) -> 33:double) -> 30:double, LongColModuloLongColumn(col 34:int, col 35:int)(children: LongColUnaryMinus(col 32:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 32:int) -> 34:int, LongScalarModuloLongColumn(val -563, col 32:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 32:int) -> 35:int) -> 32:int, DoubleColSubtractDoubleColumn(col 36:double, col 33:double)(children: DoubleColDivideDoubleScalar(col 33:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 33:double) -> 36:double, LongColDivideLongColumn(col 8:bigint, col 9:bigint) -> 33:double) -> 37:double, LongColUnaryMinus(col 34:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 34:int) -> 35:int, DoubleColDivideLongColumn(col 33:double, col 39:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 36:double)(children: DoubleColDivideLongColumn(col 33:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 33:double) -> 36:double) -> 33:double, IfExprNullCondExpr(col 34:boolean, null, col 38:bigint)(children: LongColEqualLongScalar(col 14:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 14:bigint, val 1) -> 38:bigint) -> 39:bigint) -> 36:double Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -487,7 +521,7 @@ WHERE (((cbigint <= 197) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 +-20301111 5445.576984978541 -1626869520 7.9684972882908896E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687872E16 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -585,33 +619,34 @@ STAGE PLANS: predicate: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a')) or (cfloat = 762) or (cstring1 = 'ss') or (ctimestamp1 = ctimestamp2)) (type: boolean) Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble + expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] + projectedOutputColumnNums: [3, 0, 1, 2, 5, 13, 16, 14, 18, 15] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + aggregations: sum(_col6), sum(_col5), count(_col0), count(), max(_col1), sum(_col8), sum(_col7), count(_col2), max(_col3), sum(_col9), sum(_col4), count(_col4), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFAvgLong(col 0:tinyint) -> struct + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumLong(col 0:tinyint) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: int), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -624,24 +659,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3), max(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), count(VALUE._col12), sum(VALUE._col13) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMaxLong(col 4:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxLong(col 8:int) -> int, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFSumLong(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0D % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762L * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762L * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728L % (UDFToLong(_col2) + (762L * (- _col1)))) (type: bigint) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) - (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), _col3 (type: bigint), (CAST( _col3 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col4 (type: tinyint), (UDFToDouble(_col3) - (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), (- (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), (-1.0D % (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), _col3 (type: bigint), (- _col3) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col7)) / _col7), 0.5) (type: double), (- (- (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)))) (type: double), (762L * (- _col3)) (type: bigint), _col8 (type: int), (UDFToLong(_col4) + (762L * (- _col3))) (type: bigint), ((- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)) + UDFToDouble(_col8)) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END), 0.5) (type: double), ((- _col3) % _col3) (type: bigint), _col12 (type: bigint), (_col13 / _col12) (type: double), (-3728L % (UDFToLong(_col4) + (762L * (- _col3)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [15, 14, 18, 3, 20, 4, 21, 17, 22, 3, 23, 16, 24, 27, 8, 26, 30, 25, 33, 12, 29, 34] + selectExpressions: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 15:double) -> 14:double) -> 15:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 16:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 16:double) -> 14:double) -> 16:double) -> 14:double, DoubleColSubtractDoubleColumn(col 17:double, col 16:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double, DecimalColModuloDecimalScalar(col 19:decimal(19,0), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 19:decimal(19,0)) -> 20:decimal(5,3), DoubleColSubtractDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 17:double) -> 21:double) -> 17:double) -> 21:double) -> 17:double) -> 21:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleScalarModuloDoubleColumn(val -1.0, col 16:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double, LongColUnaryMinus(col 3:bigint) -> 23:bigint, FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 16:double) -> 24:double) -> 16:double) -> 24:double) -> 16:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 24:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, LongScalarMultiplyLongColumn(val 762, col 26:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 26:bigint) -> 27:bigint, LongColAddLongColumn(col 4:bigint, col 28:bigint)(children: col 4:tinyint, LongScalarMultiplyLongColumn(val 762, col 26:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 26:bigint) -> 28:bigint) -> 26:bigint, DoubleColAddDoubleColumn(col 25:double, col 29:double)(children: DoubleColUnaryMinus(col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 25:double) -> 29:double) -> 25:double) -> 29:double) -> 25:double, CastLongToDouble(col 8:int) -> 29:double) -> 30:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 25:double) -> 29:double) -> 25:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 29:double) -> 25:double, LongColModuloLongColumn(col 32:bigint, col 3:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 32:bigint) -> 33:bigint, LongColDivideLongColumn(col 13:bigint, col 12:bigint) -> 29:double, LongScalarModuloLongColumn(val -3728, col 32:bigint)(children: LongColAddLongColumn(col 4:bigint, col 34:bigint)(children: col 4:tinyint, LongScalarMultiplyLongColumn(val 762, col 32:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 32:bigint) -> 34:bigint) -> 32:bigint) -> 34:bigint Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -722,7 +773,7 @@ WHERE ((ctimestamp1 = ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2.5109214708345636E18 -2.5109214708345636E18 5.0218429416691272E18 2780 75.198 62 2.5109214708345661E18 2.5109214708345636E18 -1.0 2780 -2780 9460.675803068349 -2.5109214708345636E18 -2118360 1072872630 -2118298 -2.5109214697616911E18 185935.34910862707 0 758 -1.733509234828496 -3728 +2.5109214708344376E18 -2.5109214708344376E18 5.0218429416688753E18 2780 75.198 62 2.5109214708344402E18 2.5109214708344376E18 -1.0 2780 -2780 9460.675803068356 -2.5109214708344376E18 -2118360 1072872630 -2118298 -2.5109214697615652E18 185935.34910862715 0 758 -1.733509234828496 -3728 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(ctinyint), @@ -801,33 +852,34 @@ STAGE PLANS: predicate: (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0D)) or ((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and (cstring1 >= 'ss')) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cint, cbigint, cfloat + expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 3, 4] + projectedOutputColumnNums: [0, 3, 2, 4, 13, 16, 14, 18] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + aggregations: sum(_col0), count(_col0), max(_col1), sum(_col5), sum(_col4), count(_col2), sum(_col7), sum(_col6), count(_col1), max(_col3) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 0:tinyint) -> struct, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFMaxDouble(col 4:float) -> float + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -840,24 +892,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), max(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxLong(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDouble(col 9:float) -> float + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), (_col0 + 6981.0D) (type: double), ((_col0 + 6981.0D) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0D) + _col0) / _col0) (type: double), (- (_col0 + 6981.0D)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0D))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28D) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + 6981.0D) (type: double), (((_col0 / _col1) + 6981.0D) + (_col0 / _col1)) (type: double), _col2 (type: bigint), ((((_col0 / _col1) + 6981.0D) + (_col0 / _col1)) / (_col0 / _col1)) (type: double), (- ((_col0 / _col1) + 6981.0D)) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END), 0.5) (type: double), ((_col0 / _col1) % (- ((_col0 / _col1) + 6981.0D))) (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), ((_col6 - ((_col7 * _col7) / _col8)) / _col8) (type: double), (- _col2) (type: bigint), (UDFToDouble((- _col2)) / power(((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END), 0.5)) (type: double), _col9 (type: float), (((_col6 - ((_col7 * _col7) / _col8)) / _col8) * -26.28D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 12, 14, 2, 13, 11, 15, 21, 20, 22, 19, 25, 9, 16] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 10:double, DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double, DoubleColAddDoubleColumn(col 13:double, col 11:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 14:double, DoubleColDivideDoubleColumn(col 15:double, col 11:double)(children: DoubleColAddDoubleColumn(col 13:double, col 11:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 15:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 15:double) -> 11:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 15:double) -> 16:double) -> 15:double, IfExprNullCondExpr(col 17:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 16:double) -> 15:double, DoubleColModuloDoubleColumn(col 16:double, col 20:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 16:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColAddDoubleScalar(col 20:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 20:double) -> 21:double) -> 20:double) -> 21:double, DoubleColDivideLongColumn(col 16:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 20:double)(children: DoubleColDivideLongColumn(col 16:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 16:double) -> 20:double) -> 16:double) -> 20:double, DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double, LongColUnaryMinus(col 2:bigint) -> 19:bigint, DoubleColDivideDoubleColumn(col 16:double, col 24:double)(children: CastLongToDouble(col 23:bigint)(children: LongColUnaryMinus(col 2:bigint) -> 23:bigint) -> 16:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 24:double) -> 25:double) -> 24:double, IfExprNullCondExpr(col 23:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 24:double) -> 25:double, DoubleColMultiplyDoubleScalar(col 24:double, val -26.28)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 16:double) -> 24:double) -> 16:double) -> 24:double) -> 16:double Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -919,7 +987,7 @@ WHERE (((ctimestamp2 <= ctimestamp1) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222071937E8 -0.5934409161894847 2.5099887741860852E16 1.52140608502098816E18 -2141851355 -13.510823917813237 79.553 -3.998255191435157E19 +-0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222070777E8 -0.5934409161894847 2.5099887741857176E16 1.52140608502098611E18 -2141851355 -13.510823917814225 79.553 -3.998255191435152E19 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, @@ -2118,27 +2186,28 @@ STAGE PLANS: predicate: (((UDFToInteger(csmallint) = -6432) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint))) and (UDFToInteger(csmallint) >= -257)) (type: boolean) Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint) - outputColumnNames: ctinyint, csmallint, cbigint + expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3] + projectedOutputColumnNums: [1, 3, 0, 13, 16, 14, 18] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 1:smallint) -> 14:double, CastLongToDouble(col 1:smallint) -> 15:double) -> 16:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2503 Data size: 52344 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + aggregations: sum(_col4), sum(_col3), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count() Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:smallint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - keys: csmallint (type: smallint) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + keys: _col0 (type: smallint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + @@ -2147,8 +2216,8 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) + Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2161,26 +2230,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), count(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumLong(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFCountMerge(col 8:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:smallint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: KEY._col0 (type: smallint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1141 Data size: 39924 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1141 Data size: 76436 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010L) (type: bigint) + expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 9, 10, 16, 4, 18, 19, 17, 14, 8, 20] + selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 9:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 12:boolean, null, col 13:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 12:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 13:bigint) -> 14:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 15:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 15:decimal(5,0)) -> 16:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 17:double)(children: CastLongToDouble(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 17:double) -> 18:double, LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 19:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 17:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 17:double) -> 11:double) -> 17:double, LongColUnaryMinus(col 20:int)(children: LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 20:int) -> 14:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 20:bigint Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -2282,7 +2369,7 @@ POSTHOOK: Input: default@alltypesorc -42 -42 NULL 0.033071429 NULL NULL 42 0.0 -42 1 89011 -49 -49 NULL 0.028346939 NULL NULL 49 0.0 -49 1 89011 -62 -62 NULL 0.022403226 NULL NULL 62 0.0 -62 1 89011 --75 0 0.0 0.018520000 NULL NULL 0 107.55555555555556 0 3 89013 +-75 0 0.0 0.018520000 NULL NULL 0 107.55555555555554 0 3 89013 -77 -2 NULL 0.018038961 NULL NULL 2 0.0 -2 1 89011 -84 -9 NULL 0.016535714 NULL NULL 9 0.0 -9 1 89011 -89 -14 NULL 0.015606742 NULL NULL 14 0.0 -14 1 89011 @@ -2376,27 +2463,28 @@ STAGE PLANS: predicate: ((((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15))) and (cdouble > 2563.58D)) (type: boolean) Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cfloat (type: float), cdouble (type: double) - outputColumnNames: cfloat, cdouble + expressions: cdouble (type: double), cfloat (type: float), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 5] + projectedOutputColumnNums: [5, 4, 14] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 2503 Data size: 59820 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + aggregations: sum(_col2), sum(_col0), count(_col0), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFSumDouble(col 5:double) -> double + aggregators: VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cdouble (type: double) + projectedOutputColumnNums: [0, 1, 2, 3, 4] + keys: _col0 (type: double) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + @@ -2405,8 +2493,8 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) + Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2419,26 +2507,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3), sum(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1136 Data size: 61320 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1136 Data size: 52232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), _col1 (type: double), (2563.58D * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58D * _col1) + -5638.15D) (type: double), ((- _col1) * ((2563.58D * _col1) + -5638.15D)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0D) (type: double), _col6 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) + expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 6, 11, 4, 17, 20, 5, 23, 26, 14, 29, 30, 2, 34] + selectExpressions: DoubleColDivideLongColumn(col 6:double, col 10:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 6:double, IfExprNullCondExpr(col 8:boolean, null, col 9:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 9:bigint) -> 10:bigint) -> 7:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 11:double) -> 6:double, IfExprNullCondExpr(col 10:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 10:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 11:double) -> 6:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 11:double) -> 14:double) -> 11:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 11:double, DoubleColAddDoubleScalar(col 14:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 17:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 14:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 14:double, col 23:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 20:double) -> 14:double, IfExprNullCondExpr(col 19:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 20:double) -> 14:double, DoubleColAddDoubleScalar(col 20:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 20:double) -> 23:double) -> 20:double, IfExprNullCondExpr(col 22:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 20:double) -> 23:double) -> 20:double, DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 23:double) -> 14:double) -> 23:double, DoubleColSubtractDoubleColumn(col 0:double, col 14:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 26:double) -> 14:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 14:double) -> 26:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 29:double) -> 14:double) -> 29:double) -> 14:double, DoubleColAddDoubleColumn(col 0:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 30:double, DoubleScalarModuloDoubleColumn(val -863.257, col 33:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 33:double) -> 34:double Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 @@ -2678,27 +2784,28 @@ STAGE PLANS: predicate: ((((UDFToInteger(ctinyint) <> -257) and cboolean2 is not null and cstring1 regexp '.*ss' and (UDFToDouble(ctimestamp1) > -3.0D)) or (UDFToDouble(ctimestamp2) = -5.0D) or ((UDFToDouble(ctimestamp1) < 0.0D) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))) and (UDFToDouble(ctimestamp1) <> 0.0D)) (type: boolean) Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble, cstring1, ctimestamp1 + expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5, 6, 8] + projectedOutputColumnNums: [8, 6, 2, 1, 0, 4, 5, 13, 16, 14, 18, 4, 15, 17, 21] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 0:tinyint) -> 19:double, CastLongToDouble(col 0:tinyint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + aggregations: sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), count(), min(_col4), sum(_col10), sum(_col9), sum(_col12), sum(_col11), count(_col5), sum(_col2), sum(_col5), min(_col6), sum(_col14), sum(_col13), count(_col4) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: var_samp, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFSumLong(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + keys: _col0 (type: timestamp), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 6144 Data size: 5199016 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 6144 Data size: 1537192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ @@ -2707,8 +2814,8 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 5199016 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) + Statistics: Num rows: 6144 Data size: 1537192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -2721,26 +2828,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), count(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), min(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), count(VALUE._col17) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMinLong(col 8:tinyint) -> tinyint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint, VectorUDAFSumLong(col 14:bigint) -> bigint, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFMinDouble(col 16:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountMerge(col 19:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:timestamp, col 1:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 3072 Data size: 645716 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 3072 Data size: 768596 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175D) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28D - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28D - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175D)) (type: double), _col6 (type: double), (_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175D / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175D))) (type: double), _col10 (type: double), (((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175D) (type: double), (10.175D % (10.175D / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28D - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28D) (type: double) + expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (_col5 / _col6) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), _col7 (type: bigint), (- _col7) (type: bigint), ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), _col8 (type: tinyint), (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7))) (type: double), (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D)) (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) (type: double), (((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), (UDFToDouble((- _col7)) / power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (10.175D / (_col5 / _col6)) (type: double), (_col14 / _col4) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END) (type: double), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D))) (type: double), (_col15 / _col13) (type: double), (((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) * 10.175D) (type: double), (10.175D % (10.175D / (_col5 / _col6))) (type: double), (- _col8) (type: tinyint), _col16 (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / _col6) (type: double), (- ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)))) (type: double), ((- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) % (_col15 / _col13)) (type: double), (-26.28 / CAST( (- _col8) AS decimal(3,0))) (type: decimal(8,6)), power(((_col17 - ((_col18 * _col18) / _col19)) / _col19), 0.5) (type: double), _col14 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) / ((_col11 - ((_col12 * _col12) / _col13)) / _col13)) (type: double), (- (- _col7)) (type: bigint), _col7 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) % -26.28D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 20, 22, 23, 21, 25, 26, 7, 27, 24, 8, 30, 28, 32, 29, 35, 40, 39, 41, 38, 43, 46, 49, 42, 50, 51, 53, 16, 55, 56, 58, 61, 54, 14, 62, 67, 7, 65] + selectExpressions: FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double) -> 21:double) -> 20:double, DoubleColMultiplyDoubleScalar(col 21:double, val 10.175)(children: FuncPowerDoubleToDouble(col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 21:double) -> 22:double) -> 21:double) -> 22:double) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 21:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 21:double, DoubleColUnaryMinus(col 24:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 24:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 26:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 26:double) -> 24:double) -> 26:double) -> 24:double) -> 26:double, LongColUnaryMinus(col 7:bigint) -> 27:bigint, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 24:double)(children: FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 28:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 28:double) -> 24:double) -> 28:double) -> 24:double) -> 28:double, DoubleColUnaryMinus(col 24:double)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 29:double) -> 24:double) -> 29:double) -> 24:double) -> 29:double) -> 24:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 28:double)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColUnaryMinus(col 28:double)(children: FuncPowerDoubleToDouble(col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, CastLongToDouble(col 31:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 31:bigint) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 29:double)(children: DoubleColMultiplyDoubleScalar(col 28:double, val 10.175)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double, DoubleColDivideLongColumn(col 29:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 32:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 32:double) -> 29:double, IfExprNullCondExpr(col 31:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 31:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 32:double, DoubleColAddDoubleColumn(col 35:double, col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 35:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 35:double) -> 29:double, IfExprNullCondExpr(col 34:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double, DoubleColMultiplyDoubleColumn(col 29:double, col 38:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 39:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 29:double)(children: FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 38:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 29:double) -> 38:double) -> 29:double) -> 38:double) -> 29:double) -> 38:double, DoubleColUnaryMinus(col 29:double)(children: FuncPowerDoubleToDouble(col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 29:double) -> 39:double) -> 29:double) -> 39:double) -> 29:double) -> 39:double) -> 29:double, CastLongToDouble(col 37:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 37:bigint) -> 38:double) -> 39:double) -> 29:double, DoubleColUnaryMinus(col 38:double)(children: DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 35:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 38:double)(children: DoubleColDivideLongColumn(col 35:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 35:double) -> 38:double) -> 35:double) -> 38:double) -> 35:double) -> 38:double) -> 35:double, DoubleColDivideDoubleColumn(col 38:double, col 39:double)(children: CastLongToDouble(col 37:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 37:bigint) -> 38:double, FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 39:double) -> 40:double) -> 39:double) -> 40:double) -> 39:double) -> 40:double, DoubleColDivideLongColumn(col 38:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 38:double) -> 39:double) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 10.175, col 38:double)(children: LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 38:double) -> 41:double, LongColDivideLongColumn(col 14:bigint, col 4:bigint) -> 38:double, DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 43:double)(children: DoubleColDivideLongColumn(col 42:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 42:double) -> 43:double) -> 42:double, IfExprNullCondExpr(col 37:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 13:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 13:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 43:double, DoubleColSubtractDoubleColumn(col 42:double, col 50:double)(children: DoubleColAddDoubleColumn(col 46:double, col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 42:double) -> 46:double) -> 42:double, IfExprNullCondExpr(col 45:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 45:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 46:double, DoubleColMultiplyDoubleColumn(col 42:double, col 49:double)(children: DoubleColMultiplyDoubleColumn(col 49:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 42:double)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double, DoubleColUnaryMinus(col 42:double)(children: FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 50:double) -> 42:double) -> 50:double) -> 42:double) -> 50:double) -> 42:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 49:double) -> 50:double) -> 42:double, DoubleColMultiplyDoubleColumn(col 46:double, col 49:double)(children: DoubleColMultiplyDoubleColumn(col 49:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 46:double)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 46:double) -> 49:double) -> 46:double) -> 49:double) -> 46:double) -> 49:double, DoubleColUnaryMinus(col 46:double)(children: FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 46:double) -> 50:double) -> 46:double) -> 50:double) -> 46:double) -> 50:double) -> 46:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 49:double) -> 50:double) -> 46:double, DoubleColUnaryMinus(col 42:double)(children: DoubleColUnaryMinus(col 49:double)(children: DoubleColMultiplyDoubleScalar(col 42:double, val 10.175)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 42:double, DoubleColMultiplyDoubleScalar(col 51:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 50:double, col 55:double)(children: DoubleColAddDoubleColumn(col 51:double, col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 53:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 51:double)(children: DoubleColDivideLongColumn(col 50:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 50:double) -> 51:double) -> 50:double, IfExprNullCondExpr(col 48:boolean, null, col 52:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 48:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 52:bigint) -> 53:bigint) -> 51:double, DoubleColMultiplyDoubleColumn(col 50:double, col 54:double)(children: DoubleColMultiplyDoubleColumn(col 54:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 50:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 50:double) -> 54:double) -> 50:double) -> 54:double) -> 50:double) -> 54:double, DoubleColUnaryMinus(col 50:double)(children: FuncPowerDoubleToDouble(col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 50:double) -> 55:double) -> 50:double) -> 55:double) -> 50:double) -> 55:double) -> 50:double, CastLongToDouble(col 53:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 53:bigint) -> 54:double) -> 55:double) -> 50:double, DoubleColMultiplyDoubleColumn(col 51:double, col 54:double)(children: DoubleColMultiplyDoubleColumn(col 54:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 51:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 54:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 51:double) -> 54:double) -> 51:double) -> 54:double) -> 51:double) -> 54:double, DoubleColUnaryMinus(col 51:double)(children: FuncPowerDoubleToDouble(col 55:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 55:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 51:double) -> 55:double) -> 51:double) -> 55:double) -> 51:double) -> 55:double) -> 51:double, CastLongToDouble(col 53:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 53:bigint) -> 54:double) -> 55:double) -> 51:double) -> 50:double, DoubleScalarModuloDoubleColumn(val 10.175, col 54:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 51:double)(children: LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 51:double) -> 54:double) -> 51:double, LongColUnaryMinus(col 8:tinyint) -> 53:tinyint, DoubleColDivideLongColumn(col 54:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 55:double)(children: DoubleColDivideLongColumn(col 54:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 54:double) -> 55:double) -> 54:double) -> 55:double, DoubleColUnaryMinus(col 54:double)(children: DoubleColMultiplyDoubleColumn(col 56:double, col 57:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 54:double)(children: FuncPowerDoubleToDouble(col 56:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 56:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 56:double) -> 54:double) -> 56:double) -> 54:double) -> 56:double, DoubleColUnaryMinus(col 54:double)(children: FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 56:double, DoubleColModuloDoubleColumn(col 57:double, col 54:double)(children: DoubleColUnaryMinus(col 54:double)(children: FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 54:double) -> 58:double, DecimalScalarDivideDecimalColumn(val -26.28, col 60:decimal(3,0))(children: CastLongToDecimal(col 59:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 59:tinyint) -> 60:decimal(3,0)) -> 61:decimal(8,6), FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 17:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 19:bigint)(children: DoubleColMultiplyDoubleColumn(col 18:double, col 18:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double, DoubleColDivideDoubleColumn(col 57:double, col 65:double)(children: DoubleColAddDoubleColumn(col 62:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 64:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 62:double)(children: DoubleColDivideLongColumn(col 57:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 62:double) -> 57:double, IfExprNullCondExpr(col 59:boolean, null, col 63:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 59:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 63:bigint) -> 64:bigint) -> 62:double, DoubleColMultiplyDoubleColumn(col 57:double, col 65:double)(children: DoubleColMultiplyDoubleColumn(col 65:double, col 66:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 57:double)(children: FuncPowerDoubleToDouble(col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 65:double) -> 57:double) -> 65:double) -> 57:double) -> 65:double, DoubleColUnaryMinus(col 57:double)(children: FuncPowerDoubleToDouble(col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double, CastLongToDouble(col 64:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 64:bigint) -> 65:double) -> 66:double) -> 57:double, DoubleColDivideLongColumn(col 62:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 65:double)(children: DoubleColDivideLongColumn(col 62:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 62:double) -> 65:double) -> 62:double) -> 65:double) -> 62:double, LongColUnaryMinus(col 64:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 64:bigint) -> 67:bigint, DoubleColModuloDoubleScalar(col 57:double, val -26.28)(children: DoubleColAddDoubleColumn(col 65:double, col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 69:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 65:double) -> 57:double, IfExprNullCondExpr(col 64:boolean, null, col 68:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 64:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 68:bigint) -> 69:bigint) -> 65:double, DoubleColMultiplyDoubleColumn(col 57:double, col 66:double)(children: DoubleColMultiplyDoubleColumn(col 66:double, col 70:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 57:double)(children: FuncPowerDoubleToDouble(col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double, DoubleColUnaryMinus(col 57:double)(children: FuncPowerDoubleToDouble(col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 70:double) -> 57:double) -> 70:double) -> 57:double) -> 70:double) -> 57:double, CastLongToDouble(col 69:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 69:bigint) -> 66:double) -> 70:double) -> 57:double) -> 65:double Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -3061,27 +3186,28 @@ STAGE PLANS: predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cboolean1 (type: boolean) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cboolean1 + expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 10] + projectedOutputColumnNums: [10, 4, 3, 2, 5, 0, 1, 13, 17, 15, 19, 16, 21, 18, 23] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 2:int) -> 15:double, CastLongToDouble(col 2:int) -> 16:double) -> 17:double, CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 18:double, col 20:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 20:double, col 22:double)(children: CastLongToDouble(col 1:smallint) -> 20:double, CastLongToDouble(col 1:smallint) -> 22:double) -> 23:double Statistics: Num rows: 7153 Data size: 1514550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + aggregations: max(_col1), sum(_col2), sum(_col8), sum(_col7), count(_col3), sum(_col4), count(_col4), min(_col2), sum(_col10), sum(_col9), count(_col2), sum(_col3), sum(_col12), sum(_col11), count(_col5), sum(_col14), sum(_col13), count(_col6) Group By Vectorization: - aggregators: VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFAvgLong(col 2:int) -> struct + aggregators: VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - keys: cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + keys: _col0 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 3 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -3090,8 +3216,8 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + Statistics: Num rows: 3 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: double), _col17 (type: double), _col18 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -3104,26 +3230,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), count(VALUE._col10), sum(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), count(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), count(VALUE._col17) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMinLong(col 8:bigint) -> bigint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFSumLong(col 12:bigint) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountMerge(col 15:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFCountMerge(col 18:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: KEY._col0 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 3 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 + Statistics: Num rows: 3 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28D / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553D / _col6) (type: double), (_col3 % (79.553D / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double) + expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28D / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), ((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), (_col6 / _col7) (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + ((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END)) (type: double), _col8 (type: bigint), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (- (10.175 + (- _col1))) (type: float), (79.553D / ((_col9 - ((_col10 * _col10) / _col11)) / _col11)) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) % (79.553D / ((_col9 - ((_col10 * _col10) / _col11)) / _col11))) (type: double), _col12 (type: bigint), power(((_col13 - ((_col14 * _col14) / _col15)) / CASE WHEN ((_col15 = 1L)) THEN (null) ELSE ((_col15 - 1)) END), 0.5) (type: double), (-1.389 * CAST( _col8 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col12 AS decimal(19,0)) - (-1.389 * CAST( _col8 AS decimal(19,0)))) (type: decimal(25,3)), power(((_col16 - ((_col17 * _col17) / _col18)) / _col18), 0.5) (type: double), (- (CAST( _col12 AS decimal(19,0)) - (-1.389 * CAST( _col8 AS decimal(19,0))))) (type: decimal(25,3)), (UDFToDouble(_col12) / _col5) (type: double), (- (UDFToDouble(_col12) / _col5)) (type: double), ((UDFToDouble(_col12) / _col5) * UDFToDouble(_col12)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 19, 20, 2, 22, 24, 23, 31, 28, 34, 8, 35, 33, 38, 43, 12, 39, 46, 49, 40, 52, 53, 50, 56] + selectExpressions: DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleScalarDivideDoubleColumn(val -26.28, col 1:double)(children: col 1:float) -> 20:double, DecimalColSubtractDecimalScalar(col 21:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 21:decimal(19,0)) -> 22:decimal(23,3), DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 24:double)(children: DoubleColDivideLongColumn(col 23:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 23:double) -> 24:double) -> 23:double, IfExprNullCondExpr(col 25:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 24:double, DoubleColModuloDoubleColumn(col 28:double, col 1:double)(children: DoubleColDivideLongColumn(col 23:double, col 30:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 28:double)(children: DoubleColDivideLongColumn(col 23:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 23:double) -> 28:double) -> 23:double, IfExprNullCondExpr(col 27:boolean, null, col 29:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 29:bigint) -> 30:bigint) -> 28:double, col 1:float) -> 23:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 28:float)(children: DoubleColUnaryMinus(col 1:float) -> 28:float) -> 31:float, DoubleColDivideLongColumn(col 6:double, col 7:bigint) -> 28:double, DoubleColAddDoubleColumn(col 33:double, col 35:double)(children: CastDecimalToDouble(col 32:decimal(23,3))(children: DecimalColSubtractDecimalScalar(col 21:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 21:decimal(19,0)) -> 32:decimal(23,3)) -> 33:double, DoubleColDivideLongColumn(col 34:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 35:double)(children: DoubleColDivideLongColumn(col 34:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 34:double) -> 35:double) -> 34:double, IfExprNullCondExpr(col 30:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double) -> 34:double, DoubleColDivideLongColumn(col 33:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 35:double)(children: DoubleColDivideLongColumn(col 33:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 33:double) -> 35:double) -> 33:double) -> 35:double, DoubleColUnaryMinus(col 38:float)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 33:float)(children: DoubleColUnaryMinus(col 1:float) -> 33:float) -> 38:float) -> 33:float, DoubleScalarDivideDoubleColumn(val 79.553, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 38:double) -> 39:double) -> 38:double) -> 39:double) -> 38:double, DoubleColModuloDoubleColumn(col 40:double, col 39:double)(children: DoubleColDivideLongColumn(col 39:double, col 42:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 39:double) -> 40:double) -> 39:double, IfExprNullCondExpr(col 37:boolean, null, col 41:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 41:bigint) -> 42:bigint) -> 40:double, DoubleScalarDivideDoubleColumn(val 79.553, col 43:double)(children: DoubleColDivideLongColumn(col 39:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 43:double)(children: DoubleColDivideLongColumn(col 39:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 39:double) -> 43:double) -> 39:double) -> 43:double) -> 39:double) -> 43:double, FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 13:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 15:bigint)(children: DoubleColMultiplyDoubleColumn(col 14:double, col 14:double) -> 39:double) -> 40:double) -> 39:double, IfExprNullCondExpr(col 42:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 15:bigint, val 1) -> 42:boolean, LongColSubtractLongScalar(col 15:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 40:double) -> 39:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 21:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 21:decimal(19,0)) -> 46:decimal(24,3), DecimalColSubtractDecimalColumn(col 21:decimal(19,0), col 48:decimal(24,3))(children: CastLongToDecimal(col 12:bigint) -> 21:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 47:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 47:decimal(19,0)) -> 48:decimal(24,3)) -> 49:decimal(25,3), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 40:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 16:double, col 50:double)(children: DoubleColDivideLongColumn(col 40:double, col 18:bigint)(children: DoubleColMultiplyDoubleColumn(col 17:double, col 17:double) -> 40:double) -> 50:double) -> 40:double) -> 50:double) -> 40:double, FuncNegateDecimalToDecimal(col 51:decimal(25,3))(children: DecimalColSubtractDecimalColumn(col 21:decimal(19,0), col 48:decimal(24,3))(children: CastLongToDecimal(col 12:bigint) -> 21:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 47:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 47:decimal(19,0)) -> 48:decimal(24,3)) -> 51:decimal(25,3)) -> 52:decimal(25,3), DoubleColDivideLongColumn(col 50:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 50:double) -> 53:double, DoubleColUnaryMinus(col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 50:double) -> 54:double) -> 50:double, DoubleColMultiplyDoubleColumn(col 55:double, col 54:double)(children: DoubleColDivideLongColumn(col 54:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 54:double) -> 55:double, CastLongToDouble(col 12:bigint) -> 54:double) -> 56:double Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 3 Data size: 1800 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 @@ -3244,8 +3388,8 @@ ORDER BY cboolean1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -false 11.0 -11.0 -2.389090909090909 -17881597706 -17881597716.175 3.8953387713327066E17 6.0 -0.8249998 -2454.8879999999995 3.8953385925167296E17 -2145884705 1.66288903197104486E18 0.8249998 4.7840233756130287E-17 4.098424268084119E-17 0.8249998 -1051696618 28.692556844886422 2980633855.245 -4032330473.245 85.79562278396777 4032330473.245 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 -true 79.553 -79.553 -0.33034580136836733 -401322621137 -401322621147.175 7.9255373737244976E16 34.727455139160156 -69.378 4856.6352637899645 7.9254972414623824E16 -2130544867 2.30133924842409523E18 69.378 3.456813247089758E-17 2.0387240975807185E-18 69.378 2182477964777 34.654968050508266 2959326820.263 2179518637956.737 9461.197516216069 -2179518637956.737 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 +false 11.0 -11.0 -2.389090909090909 -17881597706 -17881597716.175 3.8953387713327046E17 1.0 -0.8249998 -2454.8879999999995 3.8953385925167277E17 -2145884705 1.66288903197104486E18 0.8249998 4.7840233756130287E-17 3.8687857663039107E-17 0.8249998 -1051696618 28.692556844886425 2980633855.245 -4032330473.245 85.79562278396776 4032330473.245 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 +true 79.553 -79.553 -0.33034580136836733 -401322621137 -401322621147.175 7.9255373737242976E16 23.552490234375 -69.378 4856.6352637899645 7.9254972414621824E16 -2130544867 2.30133924842409984E18 69.378 3.456813247089751E-17 5.788274192367441E-19 69.378 2182477964777 34.65496805050828 2959326820.263 2179518637956.737 9461.197516216063 -2179518637956.737 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 PREHOOK: query: create table test_count(i int) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 0dff57afa6..8abd2348a3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -69,25 +69,25 @@ STAGE PLANS: selectExpressions: LongColAddLongColumn(col 2:int, col 2:int) -> 13:int Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgLong(col 13:int) -> struct + aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 13:int) -> bigint, VectorUDAFCount(col 13:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -151,27 +151,36 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: LongColDivideLongColumn(col 3:bigint, col 4:bigint) -> 5:double Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index e26f92b6f8..fed8f91d17 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -143,21 +143,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 128 Data size: 22404 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -170,26 +170,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 128 Data size: 7556 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 128 Data size: 4484 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -219,20 +224,20 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --59 626923679 -15789 28 -59.0 9829.790704244733 --58 626923679 -15169 20 -58.0 9549.096672008198 --54 626923679 -14815 23 -54.0 9614.154026896626 +-59 626923679 -15789 28 -59.0 9829.790704244735 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-54 626923679 -14815 23 -54.0 9614.154026896624 -50 626923679 -14320 27 -50.0 8548.827748002343 -41 626923679 -12606 21 -41.0 9034.40949481481 -38 626923679 -14914 28 -38.0 8767.375358291503 -37 626923679 -14780 17 -37.0 10368.905538788269 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -34 626923679 -15450 29 -34.0 8708.243526705026 --26 626923679 -15686 15 -26.0 10874.523900405318 +-26 626923679 -15686 15 -26.0 10874.52390040532 -24 626923679 -16311 26 -24.0 9386.736402961187 -22 626923679 -14701 22 -22.0 8809.230165774987 -18 626923679 -14863 24 -18.0 9638.430684071413 --12 626923679 -16373 22 -12.0 10173.15707541171 +-12 626923679 -16373 22 -12.0 10173.157075411711 -10 626923679 -15384 28 -10.0 8850.451610567823 -9 626923679 -15329 31 -9.0 8999.391457373968 -2 626923679 -16277 20 -2.0 10800.090249507177 @@ -241,67 +246,67 @@ POSTHOOK: Input: default@alltypes_parquet 19 626923679 -16049 21 19.0 9423.560227007669 20 626923679 -15149 21 20.0 11161.893298093504 21 626923679 -15931 23 21.0 9683.044864861204 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 -30 626923679 -14111 27 30.0 10066.520234676527 -34 626923679 -15059 28 34.0 8756.731536033676 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 +30 626923679 -14111 27 30.0 10066.520234676529 +34 626923679 -15059 28 34.0 8756.731536033674 36 626923679 -15912 20 36.0 9475.257975138164 38 626923679 -15248 29 38.0 9900.256257785535 40 626923679 -15861 22 40.0 9283.318678549174 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 49 626923679 -15923 27 49.0 9850.111848934683 53 626923679 -16217 30 53.0 9895.247408969733 58 626923679 -13627 20 58.0 9083.529665947459 NULL 1073418988 -16379 3115 NULL 305051.4870777435 -60 626923679 -15792 24 -60.0 9892.656196775464 --56 626923679 -11999 33 -56.0 9490.842152672341 +-56 626923679 -11999 33 -56.0 9490.84215267234 -49 626923679 -14831 23 -49.0 9894.429191738676 -46 626923679 -12427 21 -46.0 9182.943188188632 -45 626923679 -15027 21 -45.0 8567.489593562543 -42 626923679 -16025 14 -42.0 9692.646755759979 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -28 626923679 -15813 20 -28.0 9616.869413270924 -27 626923679 -14984 20 -27.0 8465.29660255097 -21 626923679 -16017 27 -21.0 9480.349236669877 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -19 626923679 -15935 25 -19.0 9967.22240685782 -17 626923679 -15922 19 -17.0 9944.104273894172 -14 626923679 -13884 22 -14.0 10125.818731386042 --8 626923679 -14678 18 -8.0 9976.831992670684 +-8 626923679 -14678 18 -8.0 9976.831992670686 -6 626923679 -15980 30 -6.0 10262.829252317424 0 626923679 -14254 24 0.0 10057.5018088718 -9 626923679 -13629 25 9.0 10157.217948808622 -10 626923679 -15887 26 10.0 9104.820520135108 +9 626923679 -13629 25 9.0 10157.21794880862 +10 626923679 -15887 26 10.0 9104.82052013511 28 626923679 -14455 20 28.0 9283.289383115296 37 626923679 -12081 24 37.0 9017.860034890362 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 48 626923679 -16372 29 48.0 10079.286173063345 52 626923679 -15450 20 52.0 9261.723648435052 -59 626923679 -16076 17 59.0 10117.44967077967 +59 626923679 -16076 17 59.0 10117.449670779672 -64 626923679 -15920 21 -64.0 9254.456539277186 -63 626923679 -12516 16 -63.0 9263.605837223322 --62 626923679 -15992 24 -62.0 9004.593091474135 --61 626923679 -15142 22 -61.0 9357.236187870849 +-62 626923679 -15992 24 -62.0 9004.593091474137 +-61 626923679 -15142 22 -61.0 9357.23618787085 -55 626923679 -13381 26 -55.0 9157.562103946742 --53 626923679 -15445 19 -53.0 9387.739325499799 +-53 626923679 -15445 19 -53.0 9387.7393254998 -52 626923679 -16369 30 -52.0 8625.06871423408 -51 1073680599 -15734 1028 -51.0 9531.569305177045 --29 626923679 -14747 26 -29.0 9052.945656011721 --15 626923679 -16036 24 -15.0 9450.506254395024 --13 626923679 -15446 30 -13.0 8907.942987576693 --7 626923679 -14584 23 -7.0 9946.605446407746 +-29 626923679 -14747 26 -29.0 9052.945656011723 +-15 626923679 -16036 24 -15.0 9450.506254395026 +-13 626923679 -15446 30 -13.0 8907.942987576691 +-7 626923679 -14584 23 -7.0 9946.605446407748 -5 626923679 -15780 24 -5.0 10599.227726422314 -4 626923679 -16207 21 -4.0 9682.726604102581 -3 626923679 -13632 16 -3.0 8836.215573422822 --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 -4 626923679 -15999 29 4.0 9516.189702058042 -6 626923679 -15948 30 6.0 9644.247255286113 -16 626923679 -14001 26 16.0 10130.883606275334 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 +4 626923679 -15999 29 4.0 9516.189702058044 +6 626923679 -15948 30 6.0 9644.247255286115 +16 626923679 -14001 26 16.0 10130.883606275338 18 626923679 -15779 21 18.0 10820.004053788869 31 626923679 -15960 24 31.0 10427.970184550613 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -39 626923679 -14887 28 39.0 10513.343644635232 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +39 626923679 -14887 28 39.0 10513.343644635233 43 626923679 -15703 28 43.0 8757.796089055722 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 @@ -309,8 +314,8 @@ NULL 1073418988 -16379 3115 NULL 305051.4870777435 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 60 626923679 -13606 23 60.0 8346.267436552042 --57 626923679 -14893 32 -57.0 8572.083461570477 --48 626923679 -15462 26 -48.0 9913.883371354861 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-48 626923679 -15462 26 -48.0 9913.883371354863 -47 626923679 -16096 19 -47.0 9011.009178780589 -44 626923679 -15667 21 -44.0 10334.01810499552 -43 626923679 -15607 27 -43.0 8715.255026265124 @@ -319,21 +324,21 @@ NULL 1073418988 -16379 3115 NULL 305051.4870777435 -35 626923679 -16059 23 -35.0 10136.580492864763 -33 626923679 -12779 21 -33.0 8854.331159704514 -31 626923679 -15915 22 -31.0 9187.596784112568 --30 626923679 -14863 23 -30.0 9193.941914019653 --25 626923679 -15862 24 -25.0 9778.256724727018 +-30 626923679 -14863 23 -30.0 9193.941914019651 +-25 626923679 -15862 24 -25.0 9778.25672472702 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -16 626923679 -15154 21 -16.0 8884.207393686478 --11 626923679 -15659 32 -11.0 10453.738567408038 -1 626923679 -14610 30 1.0 10016.486277900643 +-11 626923679 -15659 32 -11.0 10453.73856740804 +1 626923679 -14610 30 1.0 10016.486277900645 2 626923679 -16227 25 2.0 10083.276127543355 -3 626923679 -16339 30 3.0 10483.526375885149 +3 626923679 -16339 30 3.0 10483.526375885147 5 626923679 -16169 31 5.0 11114.001902469323 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 -15 626923679 -16339 28 15.0 9770.473400901916 +15 626923679 -16339 28 15.0 9770.473400901918 22 626923679 -16280 26 22.0 9693.155720861765 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 @@ -344,9 +349,9 @@ NULL 1073418988 -16379 3115 NULL 305051.4870777435 42 626923679 -15834 28 42.0 10318.01399719996 51 626923679 -15790 17 51.0 9220.075799194028 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +55 626923679 -15887 21 55.0 9826.385691928082 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 PREHOOK: query: CREATE TABLE empty_parquet(x int) PARTITIONED BY (y int) stored as parquet PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 92c6a734d8..acb9126aa7 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -282,27 +282,28 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cdecimal (type: decimal(4,2)) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1, cdecimal + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 10] + projectedOutputColumnNums: [1, 0, 2, 5, 3, 4, 10, 12] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 12:double Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col7), sum(_col5), count(_col5), max(_col6) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFAvgDouble(col 3:float) -> struct, VectorUDAFVarDouble(col 4:double) -> struct aggregation: stddev_pop, VectorUDAFMaxDecimal(col 10:decimal(4,2)) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFSumDouble(col 3:float) -> double, VectorUDAFCount(col 3:float) -> bigint, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:double) -> bigint, VectorUDAFMaxDecimal(col 10:decimal(4,2)) -> decimal(4,2) className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: ctinyint (type: tinyint) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -311,8 +312,8 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs (cache only) Map Vectorization: @@ -325,24 +326,46 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDecimal(col 9:decimal(4,2)) -> decimal(4,2) + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:tinyint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: tinyint) - sort order: + + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 3 Data size: 516 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double), _col9 (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 10, 11, 9] + selectExpressions: DoubleColDivideLongColumn(col 4:double, col 5:bigint) -> 10:double, FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 11:double) -> 12:double) -> 11:double) -> 12:double) -> 11:double Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -404,7 +427,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### 1 121 1 8 1.1749999970197678 2.0621590627301285 90.33 -2 119 1 7 1.2142857142857142 1.8 60.12 +2 119 1 7 1.2142857142857142 1.8000000000000003 60.12 3 120 1 7 1.171428578240531 1.7999999999999996 90.21 PREHOOK: query: create table parquet_type_nodict like parquet_types stored as parquet tblproperties ("parquet.enable.dictionary"="false") diff --git a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 72216d3383..498f9b95bf 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -124,14 +124,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -142,26 +142,35 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: LongColDivideLongColumn(col 3:bigint, col 4:bigint) -> 5:double Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 3fb968fd4b..46a24702ec 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -365,17 +365,17 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(ts) + aggregations: sum(ts), count(ts) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0:timestamp) -> struct + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -383,9 +383,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + valueColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -414,31 +414,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: VALUE._col0:struct + dataColumnCount: 2 + dataColumns: VALUE._col0:double, VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 0:struct) -> double + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFCountMerge(col 1:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: double), CAST( _col0 AS TIMESTAMP) (type: timestamp) + expressions: (_col0 / _col1) (type: double), CAST( (_col0 / _col1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - selectExpressions: CastDoubleToTimestamp(col 0:double) -> 1:timestamp + projectedOutputColumnNums: [2, 4] + selectExpressions: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 2:double, CastDoubleToTimestamp(col 3:double)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 3:double) -> 4:timestamp Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -497,25 +497,26 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] Select Operator - expressions: ts (type: timestamp) - outputColumnNames: ts + expressions: ts (type: timestamp), UDFToDouble(ts) (type: double), (UDFToDouble(ts) * UDFToDouble(ts)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 2, 5] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 2:double, DoubleColMultiplyDoubleColumn(col 3:double, col 4:double)(children: CastTimestampToDouble(col 0:timestamp) -> 3:double, CastTimestampToDouble(col 0:timestamp) -> 4:double) -> 5:double Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) + aggregations: sum(_col2), sum(_col1), count(_col0) Group By Vectorization: - aggregators: VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: variance, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_samp, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: std, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_samp + aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCount(col 0:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + projectedOutputColumnNums: [0, 1, 2] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -523,9 +524,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + valueColumnNums: [0, 1, 2] + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -542,27 +543,54 @@ STAGE PLANS: includeColumns: [0] dataColumns: ts:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: variance(VALUE._col0), var_pop(VALUE._col1), var_samp(VALUE._col2), std(VALUE._col3), stddev(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), ((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4, 5, 6, 3, 10, 11, 12] + selectExpressions: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 4:double)(children: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 3:double) -> 4:double) -> 3:double) -> 4:double, DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 5:double)(children: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 3:double) -> 5:double) -> 3:double) -> 5:double, DoubleColDivideLongColumn(col 3:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 6:double)(children: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 3:double) -> 6:double) -> 3:double, IfExprNullCondExpr(col 7:boolean, null, col 8:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 7:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 8:bigint) -> 9:bigint) -> 6:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 10:double)(children: DoubleColDivideLongColumn(col 3:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 3:double) -> 10:double) -> 3:double) -> 10:double) -> 3:double, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 11:double) -> 10:double) -> 11:double) -> 10:double, FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 12:double)(children: DoubleColDivideLongColumn(col 11:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 11:double) -> 12:double) -> 11:double) -> 12:double) -> 11:double, FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 13:double)(children: DoubleColDivideLongColumn(col 12:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 12:double) -> 13:double) -> 12:double, IfExprNullCondExpr(col 9:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 13:double) -> 12:double Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index c88ba5d67d..c9dd434967 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -1063,33 +1063,34 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + expressions: ctimestamp1 (type: timestamp), UDFToDouble(ctimestamp1) (type: double), (UDFToDouble(ctimestamp1) * UDFToDouble(ctimestamp1)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0:timestamp) -> struct, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: variance, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_samp, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: std, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_samp + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + projectedOutputColumnNums: [0, 1, 2, 3] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1102,24 +1103,40 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col2 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col3 BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) + expressions: round((_col0 / _col1), 0) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5), 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out index 9f7339515a..474182b8a2 100644 --- a/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out +++ b/ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out @@ -465,31 +465,31 @@ STAGE PLANS: alias: parquet_types Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round((_col4 / _col5), 5) (type: double), round(power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5), 5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out index 54b1742055..43321ab915 100644 --- a/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out +++ b/ql/src/test/results/clientpositive/parquet_types_vectorization.q.out @@ -185,31 +185,31 @@ STAGE PLANS: alias: parquet_types Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round((_col4 / _col5), 5) (type: double), round(power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5), 5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index 4b3502fd22..fbb78b10c0 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -372,18 +372,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -400,16 +400,20 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -418,8 +422,8 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -436,12 +440,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -481,7 +485,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +-4.344925324321378 1158.3003004768175 1158.3003004768175 1158.426587033782 34.03381113652741 34.03381113652741 34.03381113652741 34.03566639620535 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), @@ -856,18 +860,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint + expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -884,16 +888,20 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -902,8 +910,8 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -920,12 +928,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -965,7 +973,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +-1.8515862077935246E8 2.07689300543066035E18 2.07689300543066035E18 2.07711944383072922E18 1.441142951074133E9 1.441142951074133E9 1.441142951074133E9 1.4412215110213728E9 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), @@ -1340,18 +1348,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat + expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -1368,16 +1376,20 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -1386,8 +1398,8 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -1404,12 +1416,12 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1449,7 +1461,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +-4.303895780321011 1163.8972588605056 1163.8972588605056 1164.0241556397098 34.11593848717203 34.11593848717203 34.11593848717203 34.11779822379677 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), @@ -1531,25 +1543,26 @@ STAGE PLANS: predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cfloat + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4] + projectedOutputColumnNums: [3, 4, 0, 14, 17] + selectExpressions: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 16:double) -> 17:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1557,8 +1570,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -1574,24 +1587,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double] + scratchColumnTypeNames: [decimal(13,3), double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), min(VALUE._col6) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0D + _col0) (type: double), _col1 (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) + (-6432.0D + _col0)) (type: double), _col2 (type: double), (- (-6432.0D + _col0)) (type: double), (-6432.0D + (- (-6432.0D + _col0))) (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) / (- (-6432.0D + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0D + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + expressions: (_col0 / _col1) (type: double), (- (_col0 / _col1)) (type: double), (-6432.0D + (_col0 / _col1)) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) + (-6432.0D + (_col0 / _col1))) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), (-6432.0D + (- (-6432.0D + (_col0 / _col1)))) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) / (- (-6432.0D + (_col0 / _col1)))) (type: double), _col4 (type: bigint), _col5 (type: double), (((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) % power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5)) (type: double), (- ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), ((- (-6432.0D + (_col0 / _col1))) * (- (_col0 / _col1))) (type: double), _col6 (type: tinyint), (- _col6) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1660,7 +1673,7 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593508E9 3.875716535945533E8 0.0 2.06347151720190515E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.051665108770714E9 -2.06347151720190515E18 1.5020929380914048E17 -64 64 PREHOOK: query: EXPLAIN extended select count(*) from alltypesparquet where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out index 42cbf3542d..afada382b5 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_1.q.out @@ -66,25 +66,26 @@ STAGE PLANS: predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, cfloat, cdouble + expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5] + projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -92,8 +93,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -109,24 +110,24 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,4 +183,4 @@ WHERE (((cdouble > ctinyint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563 +1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563 diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out index da3d4ad512..c284977db7 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out @@ -89,26 +89,27 @@ STAGE PLANS: predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) - outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 5, 6, 10] + projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: - aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop + aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4] - keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) @@ -120,7 +121,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -136,20 +137,20 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double) + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index bd6f5819a1..6dd6e3f2dd 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -91,26 +91,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -122,7 +123,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -138,20 +139,20 @@ STAGE PLANS: includeColumns: [0, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4)] + scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -420,26 +421,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -451,7 +453,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -468,13 +470,13 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index 91162ace81..c501fab2e8 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -91,27 +91,27 @@ STAGE PLANS: predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 4, 6, 10, 5, 14] - selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double + projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] + selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 14:double) -> struct aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) @@ -123,7 +123,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) + value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -139,20 +139,20 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index 2c2ac85606..39057d61e4 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -87,26 +87,27 @@ STAGE PLANS: predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10] + projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) @@ -118,7 +119,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) + value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -134,19 +135,19 @@ STAGE PLANS: includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double, double] Reduce Vectorization: enabled: false enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out index 398443b5f0..cf06c91650 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out @@ -64,38 +64,39 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -111,20 +112,20 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out index 6f35ea0505..131797dc51 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out @@ -70,25 +70,26 @@ STAGE PLANS: predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3, 4, 5] + projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -96,8 +97,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -113,24 +114,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 % -563.0D) (type: double), (_col0 + 762.0D) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0D) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) % -563.0D) (type: double), ((_col0 / _col1) + 762.0D) (type: double), _col2 (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), (- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) (type: double), (_col2 - (_col0 / _col1)) (type: double), _col6 (type: bigint), (- (_col2 - (_col0 / _col1))) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / _col5) - 762.0D) (type: double), _col7 (type: tinyint), ((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) (type: double), (_col8 / _col9) (type: double), (((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) - _col2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +191,4 @@ WHERE (((ctimestamp1 < ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378778E18 -1.49936299222378778E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378701E18 -64 -1.49936299222378778E18 -5650.1297631138395 -1.49936299222378496E18 +-5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378906E18 -1.49936299222378906E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378829E18 -64 -1.49936299222378906E18 -5650.1297631138395 -1.49936299222378624E18 diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out index 5df9c540e7..f98dea69ad 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out @@ -75,25 +75,26 @@ STAGE PLANS: predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, csmallint, cint, cfloat + expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4] + projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 19:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -101,8 +102,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -118,24 +119,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 - 10.175D) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175D)) (type: double), (- _col1) (type: double), (_col0 % 79.553D) (type: double), (- (_col0 * (_col0 - 10.175D))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175D))) / (_col0 - 10.175D)) (type: double), (- (_col0 - 10.175D)) (type: double), _col4 (type: double), (-3728.0D - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + expressions: power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5)) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) % 79.553D) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5) (type: double), (- power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), _col9 (type: double), ((- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) / (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (_col10 / _col11) (type: double), (-3728.0D - power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), power(((_col12 - ((_col13 * _col13) / _col11)) / _col11), 0.5) (type: double), ((_col10 / _col11) / power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -200,4 +201,4 @@ WHERE (((cint <= cfloat) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.34690095515641 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL +0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.3469009551564 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out index c295618389..973e2bd41a 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_4.q.out @@ -70,17 +70,18 @@ STAGE PLANS: predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) - outputColumnNames: ctinyint, cint, cdouble + expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 5] + projectedOutputColumnNums: [2, 5, 0, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -88,7 +89,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -96,8 +97,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -113,24 +114,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 5] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2)) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2))) (type: double) + expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (- power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5)) (type: double), (_col2 / _col3) (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3))) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +191,4 @@ WHERE (((csmallint >= cint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --493101012745 277615870175435 -493101016473 136727.7868296355 -136727.7868296355 2298.5515807767374 0 0.0 1.8694487691330246E10 -0.0 -278108971191908 -64 -64 0.0 +-493101012745 277615870175435 -493101016473 136727.78682963562 -136727.78682963562 2298.5515807767374 0 0.0 1.8694487691330276E10 -0.0 -278108971191908 -64 -64 0.0 diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out index 398443b5f0..cf06c91650 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out @@ -64,38 +64,39 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -111,20 +112,20 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out index b07cbba5b0..59db9d16a2 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_limit.q.out @@ -222,18 +222,18 @@ STAGE PLANS: selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -246,7 +246,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -269,21 +269,25 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_not.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_not.q.out index e581007c80..e8fa9dd9b7 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_not.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_not.q.out @@ -55,4 +55,4 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593508E9 3.875716535945533E8 0.0 2.06347151720190515E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.051665108770714E9 -2.06347151720190515E18 1.5020929380914048E17 -64 64 diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out index 79024e3d08..b29ca9a223 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_pushdown.q.out @@ -27,14 +27,14 @@ STAGE PLANS: outputColumnNames: cbigint Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint) + aggregations: sum(cbigint), count(cbigint) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -51,17 +51,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query1.q.out b/ql/src/test/results/clientpositive/perf/spark/query1.q.out index 58a833ba52..c15a6f6738 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query1.q.out @@ -210,13 +210,13 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 31675133 Data size: 2454207210 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: int) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: int) + expressions: ((_col1 / _col2) * 1.2) (type: decimal(38,11)), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -224,7 +224,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 15837566 Data size: 1227103566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean) + value expressions: _col0 (type: decimal(38,11)), _col1 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query13.q.out b/ql/src/test/results/clientpositive/perf/spark/query13.q.out index f4996dd0dd..92d9370d5e 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query13.q.out @@ -308,28 +308,32 @@ STAGE PLANS: outputColumnNames: _col6, _col8, _col9 Statistics: Num rows: 715776 Data size: 63145968 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col6), avg(_col8), avg(_col9), sum(_col9) + aggregations: sum(_col6), count(_col6), sum(_col8), count(_col8), sum(_col9), count(_col9) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: decimal(17,2)) + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(17,2)), _col3 (type: bigint), _col4 (type: decimal(17,2)), _col5 (type: bigint) Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 764 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), (_col2 / _col3) (type: decimal(37,22)), (_col4 / _col5) (type: decimal(37,22)), CAST( _col4 AS decimal(17,2)) (type: decimal(17,2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query17.q.out b/ql/src/test/results/clientpositive/perf/spark/query17.q.out index 7b12a39114..4a7fd454c2 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query17.q.out @@ -353,29 +353,33 @@ STAGE PLANS: input vertices: 1 Map 16 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col5), avg(_col5), stddev_samp(_col5), count(_col21), avg(_col21), stddev_samp(_col21), count(_col14), avg(_col14), stddev_samp(_col14) - keys: _col9 (type: string), _col10 (type: string), _col25 (type: string) - mode: hash + Select Operator + expressions: _col9 (type: string), _col10 (type: string), _col25 (type: string), _col5 (type: int), _col21 (type: int), _col14 (type: int), UDFToDouble(_col5) (type: double), (UDFToDouble(_col5) * UDFToDouble(_col5)) (type: double), UDFToDouble(_col21) (type: double), (UDFToDouble(_col21) * UDFToDouble(_col21)) (type: double), UDFToDouble(_col14) (type: double), (UDFToDouble(_col14) * UDFToDouble(_col14)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Group By Operator + aggregations: count(_col3), sum(_col3), sum(_col7), sum(_col6), count(_col4), sum(_col4), sum(_col9), sum(_col8), count(_col5), sum(_col5), sum(_col11), sum(_col10) + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: bigint), _col10 (type: struct), _col11 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), avg(VALUE._col1), stddev_samp(VALUE._col2), count(VALUE._col3), avg(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7), stddev_samp(VALUE._col8) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), (_col5 / _col4) (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), (_col8 / _col7) (type: double), _col9 (type: bigint), _col10 (type: double), (_col11 / _col10) (type: double) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint), (_col4 / _col3) (type: double), power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (power(((_col5 - ((_col6 * _col6) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) / (_col4 / _col3)) (type: double), _col7 (type: bigint), (_col8 / _col7) (type: double), power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (power(((_col9 - ((_col10 * _col10) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) / (_col8 / _col7)) (type: double), _col11 (type: bigint), (_col12 / _col11) (type: double), (power(((_col13 - ((_col14 * _col14) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END), 0.5) / (_col12 / _col11)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query18.q.out b/ql/src/test/results/clientpositive/perf/spark/query18.q.out index 0da17dadda..cb3c114fcf 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query18.q.out @@ -305,28 +305,28 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col4), avg(_col5), avg(_col6), avg(_col7), avg(_col8), avg(_col9), avg(_col10) + aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col6), count(_col6), sum(_col7), count(_col7), sum(_col8), count(_col8), sum(_col9), count(_col9), sum(_col10), count(_col10) keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 2108229765 Data size: 285496662075 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct) + value expressions: _col5 (type: decimal(22,2)), _col6 (type: bigint), _col7 (type: decimal(22,2)), _col8 (type: bigint), _col9 (type: decimal(22,2)), _col10 (type: bigint), _col11 (type: decimal(22,2)), _col12 (type: bigint), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: decimal(22,2)), _col16 (type: bigint), _col17 (type: decimal(22,2)), _col18 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3), avg(VALUE._col4), avg(VALUE._col5), avg(VALUE._col6) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), count(VALUE._col13) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)), _col11 (type: decimal(16,6)) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), (_col5 / _col6) (type: decimal(38,18)), (_col7 / _col8) (type: decimal(38,18)), (_col9 / _col10) (type: decimal(38,18)), (_col11 / _col12) (type: decimal(38,18)), (_col13 / _col14) (type: decimal(38,18)), (_col15 / _col16) (type: decimal(38,18)), (_col17 / _col18) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -334,11 +334,11 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col4 (type: decimal(16,6)), _col5 (type: decimal(16,6)), _col6 (type: decimal(16,6)), _col7 (type: decimal(16,6)), _col8 (type: decimal(16,6)), _col9 (type: decimal(16,6)), _col10 (type: decimal(16,6)) + value expressions: _col4 (type: decimal(38,18)), _col5 (type: decimal(38,18)), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)), _col8 (type: decimal(38,18)), _col9 (type: decimal(38,18)), _col10 (type: decimal(38,18)) Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(16,6)), VALUE._col1 (type: decimal(16,6)), VALUE._col2 (type: decimal(16,6)), VALUE._col3 (type: decimal(16,6)), VALUE._col4 (type: decimal(16,6)), VALUE._col5 (type: decimal(16,6)), VALUE._col6 (type: decimal(16,6)) + expressions: KEY.reducesinkkey3 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,18)), VALUE._col2 (type: decimal(38,18)), VALUE._col3 (type: decimal(38,18)), VALUE._col4 (type: decimal(38,18)), VALUE._col5 (type: decimal(38,18)), VALUE._col6 (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1054114882 Data size: 142748330969 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/perf/spark/query22.q.out b/ql/src/test/results/clientpositive/perf/spark/query22.q.out index 0353312009..1837397a0b 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query22.q.out @@ -164,28 +164,28 @@ STAGE PLANS: outputColumnNames: _col3, _col8, _col9, _col10, _col11 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col3) + aggregations: sum(_col3), count(_col3) keys: _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) sort order: +++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) Statistics: Num rows: 250121525 Data size: 3951879695 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct) + value expressions: _col5 (type: bigint), _col6 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col5, _col6 Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: double) + expressions: _col3 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), (_col5 / _col6) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 125060762 Data size: 1975939839 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query24.q.out b/ql/src/test/results/clientpositive/perf/spark/query24.q.out index 54c607a890..1f291c0471 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query24.q.out @@ -323,27 +323,27 @@ STAGE PLANS: outputColumnNames: _col10 Statistics: Num rows: 463823414 Data size: 40918636268 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col10) + aggregations: sum(_col10), count(_col10) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(27,2)), _col1 (type: bigint) Reducer 18 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (0.05 * _col0) (type: decimal(24,8)) + expressions: (0.05 * (_col0 / _col1)) (type: decimal(38,12)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 @@ -586,17 +586,17 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 input vertices: 1 Reducer 18 - Statistics: Num rows: 231911707 Data size: 113455912641 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 231911707 Data size: 74494745865 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (_col3 > _col4) (type: boolean) - Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: decimal(27,2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 77303902 Data size: 37818637383 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 77303902 Data size: 24831581847 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query26.q.out b/ql/src/test/results/clientpositive/perf/spark/query26.q.out index b0f64e1e8f..e9e54e4516 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query26.q.out @@ -202,10 +202,10 @@ STAGE PLANS: outputColumnNames: _col4, _col5, _col6, _col7, _col18 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6) + aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) keys: _col18 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -213,25 +213,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 421645953 Data size: 57099332415 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), (_col7 / _col8) (type: decimal(37,22)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: decimal(37,22)), _col3 (type: decimal(37,22)), _col4 (type: decimal(37,22)) Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(37,22)), VALUE._col2 (type: decimal(37,22)), VALUE._col3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 210822976 Data size: 28549666139 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/perf/spark/query27.q.out b/ql/src/test/results/clientpositive/perf/spark/query27.q.out index 052f25ea68..7d9c85036d 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query27.q.out @@ -210,10 +210,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2), avg(_col3), avg(_col4), avg(_col5) + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4), sum(_col5), count(_col5) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2529945843 Data size: 223192556868 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) @@ -221,17 +221,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 2529945843 Data size: 223192556868 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint), _col9 (type: decimal(17,2)), _col10 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), _col3 (type: double), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6)) + expressions: _col0 (type: string), _col1 (type: string), grouping(_col2, 0) (type: bigint), (_col3 / _col4) (type: double), (_col5 / _col6) (type: decimal(37,22)), (_col7 / _col8) (type: decimal(37,22)), (_col9 / _col10) (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -239,11 +239,11 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: decimal(11,6)), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6)) + value expressions: _col2 (type: bigint), _col3 (type: double), _col4 (type: decimal(37,22)), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)), VALUE._col4 (type: decimal(11,6)) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: double), VALUE._col2 (type: decimal(37,22)), VALUE._col3 (type: decimal(37,22)), VALUE._col4 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1264972921 Data size: 111596278389 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/perf/spark/query28.q.out b/ql/src/test/results/clientpositive/perf/spark/query28.q.out index fb7e19856b..a389faa146 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query28.q.out @@ -137,7 +137,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -147,7 +147,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 13 Map Operator Tree: TableScan @@ -161,7 +161,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -171,7 +171,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 16 Map Operator Tree: TableScan @@ -185,7 +185,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -195,7 +195,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -209,7 +209,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -219,7 +219,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 7 Map Operator Tree: TableScan @@ -233,7 +233,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -243,177 +243,197 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 12 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - 3 - 4 - 5 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + 2 + 3 + 4 + 5 Reducer 14 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 15 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - 3 - 4 - 5 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + 2 + 3 + 4 + 5 Reducer 17 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 18 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - 3 - 4 - 5 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + 2 + 3 + 4 + 5 Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - 3 - 4 - 5 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + 2 + 3 + 4 + 5 Reducer 8 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 9 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 - 2 - 3 - 4 - 5 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + 2 + 3 + 4 + 5 Stage: Stage-1 Spark @@ -435,7 +455,7 @@ STAGE PLANS: outputColumnNames: ss_list_price Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_list_price), count(ss_list_price) + aggregations: sum(ss_list_price), count(ss_list_price) keys: ss_list_price (type: decimal(7,2)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -445,69 +465,73 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: decimal(7,2)) mode: partial2 outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 21333171 Data size: 1882018537 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1), count(_col2), count(_col0) + aggregations: sum(_col1), count(_col2), count(_col0) mode: partial2 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), count(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - Inner Join 0 to 3 - Inner Join 0 to 4 - Inner Join 0 to 5 - keys: - 0 - 1 - 2 - 3 - 4 - 5 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - input vertices: - 1 Reducer 6 - 2 Reducer 9 - 3 Reducer 12 - 4 Reducer 15 - 5 Reducer 18 - Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: decimal(11,6)), _col1 (type: bigint), _col2 (type: bigint), _col15 (type: decimal(11,6)), _col16 (type: bigint), _col17 (type: bigint), _col12 (type: decimal(11,6)), _col13 (type: bigint), _col14 (type: bigint), _col9 (type: decimal(11,6)), _col10 (type: bigint), _col11 (type: bigint), _col6 (type: decimal(11,6)), _col7 (type: bigint), _col8 (type: bigint), _col3 (type: decimal(11,6)), _col4 (type: bigint), _col5 (type: bigint) + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + Inner Join 0 to 3 + Inner Join 0 to 4 + Inner Join 0 to 5 + keys: + 0 + 1 + 2 + 3 + 4 + 5 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 3505 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input vertices: + 1 Reducer 6 + 2 Reducer 9 + 3 Reducer 12 + 4 Reducer 15 + 5 Reducer 18 + Statistics: Num rows: 1 Data size: 1393 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(37,22)), _col1 (type: bigint), _col2 (type: bigint), _col15 (type: decimal(37,22)), _col16 (type: bigint), _col17 (type: bigint), _col12 (type: decimal(37,22)), _col13 (type: bigint), _col14 (type: bigint), _col9 (type: decimal(37,22)), _col10 (type: bigint), _col11 (type: bigint), _col6 (type: decimal(37,22)), _col7 (type: bigint), _col8 (type: bigint), _col3 (type: decimal(37,22)), _col4 (type: bigint), _col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 1393 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 1393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1393 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query30.q.out b/ql/src/test/results/clientpositive/perf/spark/query30.q.out index 7a0c78dbdc..6385984653 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query30.q.out @@ -293,13 +293,13 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: string) + expressions: ((_col1 / _col2) * 1.2) (type: decimal(38,11)), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -307,7 +307,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean) + value expressions: _col0 (type: decimal(38,11)), _col1 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query32.q.out b/ql/src/test/results/clientpositive/perf/spark/query32.q.out index 6f614e611f..eed3a4d05c 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query32.q.out @@ -167,17 +167,17 @@ STAGE PLANS: 1 Map 8 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 316788826 Data size: 42899570777 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Local Work: Map Reduce Local Work Map 9 @@ -244,13 +244,13 @@ STAGE PLANS: Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (1.3 * _col1) (type: decimal(14,7)), _col0 (type: int) + expressions: (1.3 * (_col1 / _col2)) (type: decimal(38,21)), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -258,7 +258,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 158394413 Data size: 21449785388 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(14,7)) + value expressions: _col0 (type: decimal(38,21)) Reducer 7 Reduce Operator Tree: Join Operator @@ -274,7 +274,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 174233858 Data size: 23594764438 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(14,7)) + value expressions: _col0 (type: decimal(38,21)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query35.q.out b/ql/src/test/results/clientpositive/perf/spark/query35.q.out index 703131566a..de7c29df55 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query35.q.out @@ -441,7 +441,7 @@ STAGE PLANS: outputColumnNames: _col4, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(), avg(_col8), max(_col8), sum(_col8), avg(_col9), max(_col9), sum(_col9), avg(_col10), max(_col10), sum(_col10) + aggregations: count(), sum(_col8), count(_col8), max(_col8), sum(_col9), count(_col9), max(_col9), sum(_col10), count(_col10), max(_col10) keys: _col4 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: int), _col9 (type: int), _col10 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -452,17 +452,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: int) Statistics: Num rows: 2090864244 Data size: 184456650574 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col6 (type: bigint), _col7 (type: struct), _col8 (type: int), _col9 (type: bigint), _col10 (type: struct), _col11 (type: int), _col12 (type: bigint), _col13 (type: struct), _col14 (type: int), _col15 (type: bigint) + value expressions: _col6 (type: bigint), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: int), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: int) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), avg(VALUE._col7), max(VALUE._col8), sum(VALUE._col9) + aggregations: count(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), max(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), max(VALUE._col9) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._col4 (type: int), KEY._col5 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col6 (type: bigint), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col4 (type: int), _col10 (type: double), _col11 (type: int), _col12 (type: bigint), _col5 (type: int), _col13 (type: double), _col14 (type: int), _col15 (type: bigint), _col3 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col6 (type: bigint), (_col7 / _col8) (type: double), _col9 (type: int), _col7 (type: bigint), _col4 (type: int), (_col10 / _col11) (type: double), _col12 (type: int), _col10 (type: bigint), _col5 (type: int), (_col13 / _col14) (type: double), _col15 (type: int), _col13 (type: bigint), _col3 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col9, _col10, _col11, _col12, _col14, _col15, _col16, _col17 Statistics: Num rows: 1045432122 Data size: 92228325287 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query39.q.out b/ql/src/test/results/clientpositive/perf/spark/query39.q.out index 51fcc84e25..ee75072730 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query39.q.out @@ -258,35 +258,39 @@ STAGE PLANS: input vertices: 1 Map 16 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col3), avg(_col3) - keys: _col8 (type: int), _col7 (type: int), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col9 (type: string), _col8 (type: int), _col7 (type: int), _col3 (type: int), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Group By Operator + aggregations: sum(_col3), count(_col3), sum(_col5), sum(_col4) + keys: _col1 (type: int), _col2 (type: int), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) Reducer 13 Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END (type: boolean) + predicate: CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END (type: boolean) Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0D)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) + expressions: _col0 (type: int), _col1 (type: int), (_col3 / _col4) (type: double), CASE WHEN (((_col3 / _col4) = 0)) THEN (null) ELSE ((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4))) END (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -333,35 +337,39 @@ STAGE PLANS: input vertices: 1 Map 9 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col3), avg(_col3) - keys: _col8 (type: int), _col7 (type: int), _col9 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col9 (type: string), _col8 (type: int), _col7 (type: int), _col3 (type: int), UDFToDouble(_col3) (type: double), (UDFToDouble(_col3) * UDFToDouble(_col3)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Group By Operator + aggregations: sum(_col3), count(_col3), sum(_col5), sum(_col4) + keys: _col1 (type: int), _col2 (type: int), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 50024305 Data size: 790375939 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 25012152 Data size: 395187961 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END (type: boolean) + predicate: CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END (type: boolean) Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0D)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) + expressions: _col0 (type: int), _col1 (type: int), (_col3 / _col4) (type: double), CASE WHEN (((_col3 / _col4) = 0)) THEN (null) ELSE ((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4))) END (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 12506076 Data size: 197593980 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query44.q.out b/ql/src/test/results/clientpositive/perf/spark/query44.q.out index 4c90d24f38..4ca41fba3f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query44.q.out @@ -123,17 +123,17 @@ STAGE PLANS: outputColumnNames: _col1 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: 410 (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 12 Map Operator Tree: TableScan @@ -147,17 +147,17 @@ STAGE PLANS: outputColumnNames: ss_item_sk, ss_net_profit Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_profit) + aggregations: sum(ss_net_profit), count(ss_net_profit) keys: ss_item_sk (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 287997817 Data size: 25407250999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 14 Map Operator Tree: TableScan @@ -199,31 +199,35 @@ STAGE PLANS: Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: decimal(11,6)) + expressions: (_col1 / _col2) (type: decimal(37,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(11,6)) + value expressions: _col0 (type: decimal(37,22)) Reducer 13 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(11,6)) + Reduce Output Operator + sort order: + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) Reducer 15 Reduce Operator Tree: Join Operator @@ -289,7 +293,7 @@ STAGE PLANS: predicate: (_col3 > (0.9 * _col1)) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(11,6)) + key expressions: 0 (type: int), _col3 (type: decimal(37,22)) sort order: +- Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE @@ -314,14 +318,14 @@ STAGE PLANS: Reducer 20 Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(11,6)) + expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) outputColumnNames: _col2, _col3 Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(11,6) + output shape: _col2: int, _col3: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 @@ -354,31 +358,35 @@ STAGE PLANS: Reducer 22 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: decimal(11,6)) + expressions: (_col1 / _col2) (type: decimal(37,22)) outputColumnNames: _col0 Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 71999454 Data size: 6351812727 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(11,6)) + value expressions: _col0 (type: decimal(37,22)) Reducer 24 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(37,22)) + outputColumnNames: _col0, _col1 Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: decimal(11,6)) + Reduce Output Operator + sort order: + Statistics: Num rows: 143998908 Data size: 12703625455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: decimal(37,22)) Reducer 3 Reduce Operator Tree: Join Operator @@ -431,7 +439,7 @@ STAGE PLANS: predicate: (_col3 > (0.9 * _col1)) (type: boolean) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: 0 (type: int), _col3 (type: decimal(11,6)) + key expressions: 0 (type: int), _col3 (type: decimal(37,22)) sort order: ++ Map-reduce partition columns: 0 (type: int) Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE @@ -440,14 +448,14 @@ STAGE PLANS: Reducer 9 Reduce Operator Tree: Select Operator - expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(11,6)) + expressions: VALUE._col2 (type: int), KEY.reducesinkkey1 (type: decimal(37,22)) outputColumnNames: _col2, _col3 Statistics: Num rows: 3455947584198744 Data size: 640872925954123264 Basic stats: COMPLETE Column stats: NONE PTF Operator Function definitions: Input definition input alias: ptf_0 - output shape: _col2: int, _col3: decimal(11,6) + output shape: _col2: int, _col3: decimal(37,22) type: WINDOWING Windowing table definition input alias: ptf_1 diff --git a/ql/src/test/results/clientpositive/perf/spark/query6.q.out b/ql/src/test/results/clientpositive/perf/spark/query6.q.out index 3e464f1328..954bacba28 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query6.q.out @@ -217,17 +217,17 @@ STAGE PLANS: predicate: i_category is not null (type: boolean) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(i_current_price) + aggregations: sum(i_current_price), count(i_current_price) keys: i_category (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 462000 Data size: 663560457 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Map 20 Map Operator Tree: TableScan @@ -289,13 +289,13 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: decimal(11,6)), true (type: boolean), _col0 (type: string) + expressions: (_col1 / _col2) (type: decimal(37,22)), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 231000 Data size: 331780228 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -313,7 +313,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 231000 Data size: 333859228 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(11,6)), _col1 (type: boolean) + value expressions: _col0 (type: decimal(37,22)), _col1 (type: boolean) Reducer 16 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query65.q.out b/ql/src/test/results/clientpositive/perf/spark/query65.q.out index 860a9bacc4..3b3baef09f 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query65.q.out @@ -309,13 +309,13 @@ STAGE PLANS: outputColumnNames: _col1, _col2 Statistics: Num rows: 316797606 Data size: 27947976754 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: int) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: decimal(21,6)) + expressions: _col0 (type: int), (_col1 / _col2) (type: decimal(38,13)) outputColumnNames: _col0, _col1 Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -323,7 +323,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158398803 Data size: 13973988377 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: decimal(21,6)) + value expressions: _col1 (type: decimal(38,13)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query7.q.out b/ql/src/test/results/clientpositive/perf/spark/query7.q.out index b0979c0d46..71a08b04a7 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query7.q.out @@ -202,10 +202,10 @@ STAGE PLANS: outputColumnNames: _col4, _col5, _col6, _col7, _col18 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col4), avg(_col5), avg(_col7), avg(_col6) + aggregations: sum(_col4), count(_col4), sum(_col5), count(_col5), sum(_col7), count(_col7), sum(_col6), count(_col6) keys: _col18 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -213,25 +213,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 843315281 Data size: 74397518956 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint), _col7 (type: decimal(17,2)), _col8 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), avg(VALUE._col3) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), (_col7 / _col8) (type: decimal(37,22)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), _col4 (type: decimal(11,6)) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: decimal(37,22)), _col3 (type: decimal(37,22)), _col4 (type: decimal(37,22)) Reducer 6 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(11,6)), VALUE._col2 (type: decimal(11,6)), VALUE._col3 (type: decimal(11,6)) + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: decimal(37,22)), VALUE._col2 (type: decimal(37,22)), VALUE._col3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 421657640 Data size: 37198759433 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/perf/spark/query81.q.out b/ql/src/test/results/clientpositive/perf/spark/query81.q.out index be6a5fa3f8..5b2d5b311a 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query81.q.out @@ -294,13 +294,13 @@ STAGE PLANS: outputColumnNames: _col0, _col2 Statistics: Num rows: 22000000 Data size: 22327357890 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string) mode: complete - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (_col1 * 1.2) (type: decimal(24,7)), true (type: boolean), _col0 (type: string) + expressions: ((_col1 / _col2) * 1.2) (type: decimal(38,11)), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -308,7 +308,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 11000000 Data size: 11163678945 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(24,7)), _col1 (type: boolean) + value expressions: _col0 (type: decimal(38,11)), _col1 (type: boolean) Reducer 2 Reduce Operator Tree: Join Operator diff --git a/ql/src/test/results/clientpositive/perf/spark/query85.q.out b/ql/src/test/results/clientpositive/perf/spark/query85.q.out index b4a4990a52..d60751cfdb 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query85.q.out @@ -440,38 +440,38 @@ STAGE PLANS: outputColumnNames: _col6, _col7, _col12, _col22 Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col12), avg(_col7), avg(_col6) + aggregations: sum(_col12), count(_col12), sum(_col7), count(_col7), sum(_col6), count(_col6) keys: _col22 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2047980 Data size: 788904791 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: decimal(17,2)), _col4 (type: bigint), _col5 (type: decimal(17,2)), _col6 (type: bigint) Reducer 7 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: double), _col2 (type: decimal(11,6)), _col3 (type: decimal(11,6)), substr(_col0, 1, 20) (type: string) + expressions: (_col1 / _col2) (type: double), (_col3 / _col4) (type: decimal(37,22)), (_col5 / _col6) (type: decimal(37,22)), substr(_col0, 1, 20) (type: string) outputColumnNames: _col4, _col5, _col6, _col7 Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(11,6)), _col6 (type: decimal(11,6)) + key expressions: _col7 (type: string), _col4 (type: double), _col5 (type: decimal(37,22)), _col6 (type: decimal(37,22)) sort order: ++++ Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 8 Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(11,6)), KEY.reducesinkkey3 (type: decimal(11,6)) + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: decimal(37,22)), KEY.reducesinkkey3 (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1023990 Data size: 394452395 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/perf/spark/query9.q.out b/ql/src/test/results/clientpositive/perf/spark/query9.q.out index 49c6b7f5f8..610ce03621 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query9.q.out @@ -150,14 +150,14 @@ STAGE PLANS: outputColumnNames: ss_ext_list_price Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_ext_list_price) + aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 12 Map Operator Tree: TableScan @@ -171,14 +171,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_paid_inc_tax) + aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 14 Map Operator Tree: TableScan @@ -211,14 +211,14 @@ STAGE PLANS: outputColumnNames: ss_ext_list_price Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_ext_list_price) + aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 18 Map Operator Tree: TableScan @@ -232,14 +232,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_paid_inc_tax) + aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 2 Map Operator Tree: TableScan @@ -291,14 +291,14 @@ STAGE PLANS: outputColumnNames: ss_ext_list_price Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_ext_list_price) + aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 24 Map Operator Tree: TableScan @@ -312,14 +312,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_paid_inc_tax) + aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 26 Map Operator Tree: TableScan @@ -352,14 +352,14 @@ STAGE PLANS: outputColumnNames: ss_ext_list_price Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_ext_list_price) + aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 30 Map Operator Tree: TableScan @@ -373,14 +373,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_paid_inc_tax) + aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -394,14 +394,14 @@ STAGE PLANS: outputColumnNames: ss_ext_list_price Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_ext_list_price) + aggregations: sum(ss_ext_list_price), count(ss_ext_list_price) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -415,14 +415,14 @@ STAGE PLANS: outputColumnNames: ss_net_paid_inc_tax Statistics: Num rows: 63999515 Data size: 5646055787 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ss_net_paid_inc_tax) + aggregations: sum(ss_net_paid_inc_tax), count(ss_net_paid_inc_tax) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(17,2)), _col1 (type: bigint) Map 8 Map Operator Tree: TableScan @@ -447,27 +447,35 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 13 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 15 Local Work: Map Reduce Local Work @@ -486,27 +494,35 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 19 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 21 Local Work: Map Reduce Local Work @@ -525,27 +541,35 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 25 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 27 Local Work: Map Reduce Local Work @@ -564,14 +588,18 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 3 Local Work: Map Reduce Local Work @@ -590,40 +618,52 @@ STAGE PLANS: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 5 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 - 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(37,22)) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 Reducer 9 Local Work: Map Reduce Local Work @@ -671,7 +711,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Reducer 5 - Statistics: Num rows: 36 Data size: 17928 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 11880 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -681,7 +721,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Reducer 7 - Statistics: Num rows: 36 Data size: 28332 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 16236 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -691,7 +731,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4 input vertices: 1 Reducer 9 - Statistics: Num rows: 36 Data size: 28656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 16560 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -701,7 +741,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5 input vertices: 1 Reducer 11 - Statistics: Num rows: 36 Data size: 39060 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 20916 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -711,7 +751,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Reducer 13 - Statistics: Num rows: 36 Data size: 49464 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 25272 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -721,7 +761,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: 1 Reducer 15 - Statistics: Num rows: 36 Data size: 49788 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 25596 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -731,7 +771,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 input vertices: 1 Reducer 17 - Statistics: Num rows: 36 Data size: 60192 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 29952 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -741,7 +781,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 input vertices: 1 Reducer 19 - Statistics: Num rows: 36 Data size: 70596 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 34308 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -751,7 +791,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 input vertices: 1 Reducer 21 - Statistics: Num rows: 36 Data size: 70920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 34632 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -761,7 +801,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 input vertices: 1 Reducer 23 - Statistics: Num rows: 36 Data size: 81324 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 38988 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -771,7 +811,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 input vertices: 1 Reducer 25 - Statistics: Num rows: 36 Data size: 91728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 43344 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -781,7 +821,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Reducer 27 - Statistics: Num rows: 36 Data size: 92052 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 43668 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -791,7 +831,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 input vertices: 1 Reducer 29 - Statistics: Num rows: 36 Data size: 102456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 48024 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -801,14 +841,14 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 input vertices: 1 Reducer 31 - Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 52380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CASE WHEN ((_col1 > 409437L)) THEN (_col2) ELSE (_col3) END (type: decimal(11,6)), CASE WHEN ((_col4 > 4595804L)) THEN (_col5) ELSE (_col6) END (type: decimal(11,6)), CASE WHEN ((_col7 > 7887297L)) THEN (_col8) ELSE (_col9) END (type: decimal(11,6)), CASE WHEN ((_col10 > 10872978L)) THEN (_col11) ELSE (_col12) END (type: decimal(11,6)), CASE WHEN ((_col13 > 43571537L)) THEN (_col14) ELSE (_col15) END (type: decimal(11,6)) + expressions: CASE WHEN ((_col1 > 409437L)) THEN (_col2) ELSE (_col3) END (type: decimal(37,22)), CASE WHEN ((_col4 > 4595804L)) THEN (_col5) ELSE (_col6) END (type: decimal(37,22)), CASE WHEN ((_col7 > 7887297L)) THEN (_col8) ELSE (_col9) END (type: decimal(37,22)), CASE WHEN ((_col10 > 10872978L)) THEN (_col11) ELSE (_col12) END (type: decimal(37,22)), CASE WHEN ((_col13 > 43571537L)) THEN (_col14) ELSE (_col15) END (type: decimal(37,22)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 52380 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 112860 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 36 Data size: 52380 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/perf/spark/query92.q.out b/ql/src/test/results/clientpositive/perf/spark/query92.q.out index 1b73ab5878..e7b8632bea 100644 --- a/ql/src/test/results/clientpositive/perf/spark/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/spark/query92.q.out @@ -189,17 +189,17 @@ STAGE PLANS: 1 Map 9 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 158402938 Data size: 21538218500 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: decimal(17,2)), _col2 (type: bigint) Local Work: Map Reduce Local Work Reducer 2 @@ -263,13 +263,13 @@ STAGE PLANS: Reducer 7 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: (1.3 * _col1) (type: decimal(14,7)), _col0 (type: int) + expressions: (1.3 * (_col1 / _col2)) (type: decimal(38,21)), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -277,7 +277,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 79201469 Data size: 10769109250 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(14,7)) + value expressions: _col0 (type: decimal(38,21)) Reducer 8 Reduce Operator Tree: Join Operator @@ -293,7 +293,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 87121617 Data size: 11846020431 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(14,7)) + value expressions: _col0 (type: decimal(38,21)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tez/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/query1.q.out index ad350c1241..cbdd09ab8c 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query1.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query1.q.out @@ -142,7 +142,7 @@ Stage-0 Select Operator [SEL_38] (rows=15837566 width=77) Output:["_col0","_col1","_col2"] Group By Operator [GBY_37] (rows=15837566 width=77) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 Select Operator [SEL_33] (rows=31675133 width=77) Output:["_col1","_col2"] Group By Operator [GBY_32] (rows=31675133 width=77) diff --git a/ql/src/test/results/clientpositive/perf/tez/query13.q.out b/ql/src/test/results/clientpositive/perf/tez/query13.q.out index 7cb54498d4..065b231fa8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query13.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query13.q.out @@ -114,92 +114,94 @@ Stage-0 Stage-1 Reducer 7 File Output Operator [FS_40] - Group By Operator [GBY_38] (rows=1 width=764) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","sum(VALUE._col3)"] - <-Reducer 6 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_37] - Group By Operator [GBY_36] (rows=1 width=764) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col6)","avg(_col8)","avg(_col9)","sum(_col9)"] - Select Operator [SEL_35] (rows=715776 width=88) - Output:["_col6","_col8","_col9"] - Filter Operator [FIL_34] (rows=715776 width=88) - predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) - Merge Join Operator [MERGEJOIN_71] (rows=17178642 width=88) - Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=1861800 width=385) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_66] (rows=1861800 width=385) - predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) - TableScan [TS_15] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Filter Operator [FIL_30] (rows=15616947 width=88) - predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) - Merge Join Operator [MERGEJOIN_70] (rows=93701693 width=88) - Conds:RS_27._col4=RS_28._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=10000000 width=1014) - Output:["_col0","_col1"] - Filter Operator [FIL_65] (rows=10000000 width=1014) - predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) - TableScan [TS_12] (rows=40000000 width=1014) - default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col4 - Merge Join Operator [MERGEJOIN_69] (rows=85183356 width=88) - Conds:RS_24._col3=RS_25._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=7200 width=107) - Output:["_col0","_col1"] - Filter Operator [FIL_64] (rows=7200 width=107) - predicate:(((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) - TableScan [TS_9] (rows=7200 width=107) - default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_68] (rows=77439413 width=88) - Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_63] (rows=36524 width=1119) - predicate:((d_year = 2001) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_67] (rows=70399465 width=88) - Conds:RS_18._col0=RS_19._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=1704 width=1910) - Output:["_col0"] - Filter Operator [FIL_61] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_0] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col4 - Select Operator [SEL_5] (rows=63999513 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] - Filter Operator [FIL_62] (rows=63999513 width=88) - predicate:((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) - TableScan [TS_3] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] + Select Operator [SEL_39] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_38] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_37] + Group By Operator [GBY_36] (rows=1 width=256) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(_col6)","count(_col6)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)"] + Select Operator [SEL_35] (rows=715776 width=88) + Output:["_col6","_col8","_col9"] + Filter Operator [FIL_34] (rows=715776 width=88) + predicate:(((_col19 = 'D') and (_col20 = 'Primary') and _col7 BETWEEN 50 AND 100 and (_col14 = 1)) or ((_col19 = 'M') and (_col20 = '4 yr Degree') and _col7 BETWEEN 100 AND 150 and (_col14 = 3)) or ((_col19 = 'U') and (_col20 = 'Advanced Degree') and _col7 BETWEEN 150 AND 200 and (_col14 = 1))) + Merge Join Operator [MERGEJOIN_71] (rows=17178642 width=88) + Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col6","_col7","_col8","_col9","_col14","_col19","_col20"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=1861800 width=385) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_66] (rows=1861800 width=385) + predicate:(((cd_education_status = '4 yr Degree') or (cd_education_status = 'Primary') or (cd_education_status = 'Advanced Degree')) and ((cd_marital_status = 'M') or (cd_marital_status = 'D') or (cd_marital_status = 'U')) and cd_demo_sk is not null) + TableScan [TS_15] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_marital_status","cd_education_status"] + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Filter Operator [FIL_30] (rows=15616947 width=88) + predicate:(((_col16) IN ('KY', 'GA', 'NM') and _col10 BETWEEN 100 AND 200) or ((_col16) IN ('MT', 'OR', 'IN') and _col10 BETWEEN 150 AND 300) or ((_col16) IN ('WI', 'MO', 'WV') and _col10 BETWEEN 50 AND 250)) + Merge Join Operator [MERGEJOIN_70] (rows=93701693 width=88) + Conds:RS_27._col4=RS_28._col0(Inner),Output:["_col2","_col6","_col7","_col8","_col9","_col10","_col14","_col16"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=10000000 width=1014) + Output:["_col0","_col1"] + Filter Operator [FIL_65] (rows=10000000 width=1014) + predicate:((ca_country = 'United States') and (ca_state) IN ('KY', 'GA', 'NM', 'MT', 'OR', 'IN', 'WI', 'MO', 'WV') and ca_address_sk is not null) + TableScan [TS_12] (rows=40000000 width=1014) + default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state","ca_country"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col4 + Merge Join Operator [MERGEJOIN_69] (rows=85183356 width=88) + Conds:RS_24._col3=RS_25._col0(Inner),Output:["_col2","_col4","_col6","_col7","_col8","_col9","_col10","_col14"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=7200 width=107) + Output:["_col0","_col1"] + Filter Operator [FIL_64] (rows=7200 width=107) + predicate:(((hd_dep_count = 3) or (hd_dep_count = 1)) and hd_demo_sk is not null) + TableScan [TS_9] (rows=7200 width=107) + default@household_demographics,household_demographics,Tbl:COMPLETE,Col:NONE,Output:["hd_demo_sk","hd_dep_count"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_68] (rows=77439413 width=88) + Conds:RS_21._col1=RS_22._col0(Inner),Output:["_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_63] (rows=36524 width=1119) + predicate:((d_year = 2001) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_67] (rows=70399465 width=88) + Conds:RS_18._col0=RS_19._col4(Inner),Output:["_col1","_col2","_col3","_col4","_col6","_col7","_col8","_col9","_col10"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=1704 width=1910) + Output:["_col0"] + Filter Operator [FIL_61] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_0] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col4 + Select Operator [SEL_5] (rows=63999513 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] + Filter Operator [FIL_62] (rows=63999513 width=88) + predicate:((ss_net_profit BETWEEN 100 AND 200 or ss_net_profit BETWEEN 150 AND 300 or ss_net_profit BETWEEN 50 AND 250) and (ss_sales_price BETWEEN 100 AND 150 or ss_sales_price BETWEEN 50 AND 100 or ss_sales_price BETWEEN 150 AND 200) and ss_addr_sk is not null and ss_cdemo_sk is not null and ss_hdemo_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null) + TableScan [TS_3] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_cdemo_sk","ss_hdemo_sk","ss_addr_sk","ss_store_sk","ss_quantity","ss_sales_price","ss_ext_sales_price","ss_ext_wholesale_cost","ss_net_profit"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query14.q.out b/ql/src/test/results/clientpositive/perf/tez/query14.q.out index 1cffcb010d..4461f020ea 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query14.q.out @@ -282,27 +282,27 @@ Stage-0 Stage-1 Reducer 8 File Output Operator [FS_574] - Limit [LIM_573] (rows=100 width=405) + Limit [LIM_573] (rows=100 width=237) Number of rows:100 - Select Operator [SEL_572] (rows=1016388080 width=405) + Select Operator [SEL_572] (rows=1016388080 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 7 [SIMPLE_EDGE] SHUFFLE [RS_571] - Select Operator [SEL_570] (rows=1016388080 width=405) + Select Operator [SEL_570] (rows=1016388080 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_569] (rows=1016388080 width=405) + Group By Operator [GBY_569] (rows=1016388080 width=237) Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Union 6 [SIMPLE_EDGE] <-Reducer 12 [CONTAINS] Reduce Output Operator [RS_568] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_567] (rows=2032776160 width=405) + Group By Operator [GBY_567] (rows=2032776160 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Select Operator [SEL_375] (rows=116155905 width=432) + Select Operator [SEL_375] (rows=116155905 width=264) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_374] (rows=116155905 width=432) + Filter Operator [FIL_374] (rows=116155905 width=264) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_891] (rows=348467716 width=432) + Merge Join Operator [MERGEJOIN_891] (rows=348467716 width=264) Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_370] @@ -401,78 +401,80 @@ Stage-0 default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_quantity"] <-Reducer 27 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_371] - Group By Operator [GBY_267] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Union 26 [CUSTOM_SIMPLE_EDGE] - <-Reducer 25 [CONTAINS] - Reduce Output Operator [RS_266] - Group By Operator [GBY_265] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_263] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_251] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_864] (rows=316788826 width=135) - Conds:RS_248._col0=RS_249._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_249] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_15] - <-Map 43 [SIMPLE_EDGE] - SHUFFLE [RS_248] - PartitionCols:_col0 - Select Operator [SEL_56] (rows=287989836 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_778] (rows=287989836 width=135) - predicate:cs_sold_date_sk is not null - TableScan [TS_54] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] - <-Reducer 37 [CONTAINS] - Reduce Output Operator [RS_266] - Group By Operator [GBY_265] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_263] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_262] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_865] (rows=158402938 width=135) - Conds:RS_259._col0=RS_260._col0(Inner),Output:["_col1","_col2"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_260] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_26] - <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_259] - PartitionCols:_col0 - Select Operator [SEL_67] (rows=144002668 width=135) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_780] (rows=144002668 width=135) - predicate:ws_sold_date_sk is not null - TableScan [TS_65] (rows=144002668 width=135) - default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] - <-Reducer 41 [CONTAINS] - Reduce Output Operator [RS_266] - Group By Operator [GBY_265] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_263] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_241] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_863] (rows=633595212 width=88) - Conds:RS_238._col0=RS_239._col0(Inner),Output:["_col1","_col2"] - <-Map 86 [SIMPLE_EDGE] - SHUFFLE [RS_239] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_98] - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_238] - PartitionCols:_col0 - Select Operator [SEL_46] (rows=575995635 width=88) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_776] (rows=575995635 width=88) - predicate:ss_sold_date_sk is not null - TableScan [TS_44] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] + Select Operator [SEL_268] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_267] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 26 [CUSTOM_SIMPLE_EDGE] + <-Reducer 25 [CONTAINS] + Reduce Output Operator [RS_266] + Group By Operator [GBY_265] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_263] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_251] (rows=316788826 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_864] (rows=316788826 width=135) + Conds:RS_248._col0=RS_249._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_249] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_15] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_248] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=287989836 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_778] (rows=287989836 width=135) + predicate:cs_sold_date_sk is not null + TableScan [TS_54] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_quantity","cs_list_price"] + <-Reducer 37 [CONTAINS] + Reduce Output Operator [RS_266] + Group By Operator [GBY_265] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_263] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_262] (rows=158402938 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_865] (rows=158402938 width=135) + Conds:RS_259._col0=RS_260._col0(Inner),Output:["_col1","_col2"] + <-Map 35 [SIMPLE_EDGE] + SHUFFLE [RS_260] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_26] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_259] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=144002668 width=135) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_780] (rows=144002668 width=135) + predicate:ws_sold_date_sk is not null + TableScan [TS_65] (rows=144002668 width=135) + default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_quantity","ws_list_price"] + <-Reducer 41 [CONTAINS] + Reduce Output Operator [RS_266] + Group By Operator [GBY_265] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_263] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_241] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_863] (rows=633595212 width=88) + Conds:RS_238._col0=RS_239._col0(Inner),Output:["_col1","_col2"] + <-Map 86 [SIMPLE_EDGE] + SHUFFLE [RS_239] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_98] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_238] + PartitionCols:_col0 + Select Operator [SEL_46] (rows=575995635 width=88) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_776] (rows=575995635 width=88) + predicate:ss_sold_date_sk is not null + TableScan [TS_44] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_quantity","ss_list_price"] <-Reducer 52 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_372] Group By Operator [GBY_368] (rows=348467716 width=135) @@ -671,13 +673,13 @@ Stage-0 <-Reducer 16 [CONTAINS] Reduce Output Operator [RS_568] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_567] (rows=2032776160 width=405) + Group By Operator [GBY_567] (rows=2032776160 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Select Operator [SEL_564] (rows=58081078 width=432) + Select Operator [SEL_564] (rows=58081078 width=264) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_563] (rows=58081078 width=432) + Filter Operator [FIL_563] (rows=58081078 width=264) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_892] (rows=174243235 width=432) + Merge Join Operator [MERGEJOIN_892] (rows=174243235 width=264) Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_559] @@ -746,63 +748,65 @@ Stage-0 Please refer to the previous Select Operator [SEL_23] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_560] - Group By Operator [GBY_456] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Union 29 [CUSTOM_SIMPLE_EDGE] - <-Reducer 28 [CONTAINS] - Reduce Output Operator [RS_455] - Group By Operator [GBY_454] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_452] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_440] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_879] (rows=316788826 width=135) - Conds:RS_437._col0=RS_438._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_438] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_15] - <-Map 43 [SIMPLE_EDGE] - SHUFFLE [RS_437] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_56] - <-Reducer 38 [CONTAINS] - Reduce Output Operator [RS_455] - Group By Operator [GBY_454] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_452] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_451] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_880] (rows=158402938 width=135) - Conds:RS_448._col0=RS_449._col0(Inner),Output:["_col1","_col2"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_449] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_26] - <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_448] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_67] - <-Reducer 42 [CONTAINS] - Reduce Output Operator [RS_455] - Group By Operator [GBY_454] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_452] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_430] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_878] (rows=633595212 width=88) - Conds:RS_427._col0=RS_428._col0(Inner),Output:["_col1","_col2"] - <-Map 86 [SIMPLE_EDGE] - SHUFFLE [RS_428] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_98] - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_427] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_46] + Select Operator [SEL_457] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_456] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 29 [CUSTOM_SIMPLE_EDGE] + <-Reducer 28 [CONTAINS] + Reduce Output Operator [RS_455] + Group By Operator [GBY_454] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_452] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_440] (rows=316788826 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_879] (rows=316788826 width=135) + Conds:RS_437._col0=RS_438._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_438] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_15] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_437] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_56] + <-Reducer 38 [CONTAINS] + Reduce Output Operator [RS_455] + Group By Operator [GBY_454] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_452] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_451] (rows=158402938 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_880] (rows=158402938 width=135) + Conds:RS_448._col0=RS_449._col0(Inner),Output:["_col1","_col2"] + <-Map 35 [SIMPLE_EDGE] + SHUFFLE [RS_449] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_26] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_448] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_67] + <-Reducer 42 [CONTAINS] + Reduce Output Operator [RS_455] + Group By Operator [GBY_454] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_452] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_430] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_878] (rows=633595212 width=88) + Conds:RS_427._col0=RS_428._col0(Inner),Output:["_col1","_col2"] + <-Map 86 [SIMPLE_EDGE] + SHUFFLE [RS_428] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_98] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_427] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_46] <-Reducer 55 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_561] Group By Operator [GBY_557] (rows=174243235 width=135) @@ -908,73 +912,75 @@ Stage-0 <-Reducer 5 [CONTAINS] Reduce Output Operator [RS_568] PartitionCols:_col0, _col1, _col2, _col3, _col4 - Group By Operator [GBY_567] (rows=2032776160 width=405) + Group By Operator [GBY_567] (rows=2032776160 width=237) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col4)","sum(_col5)"],keys:_col0, _col1, _col2, _col3, 0L - Select Operator [SEL_187] (rows=232318249 width=385) + Select Operator [SEL_187] (rows=232318249 width=217) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_186] (rows=232318249 width=385) + Filter Operator [FIL_186] (rows=232318249 width=217) predicate:(_col5 > _col1) - Merge Join Operator [MERGEJOIN_890] (rows=696954748 width=385) + Merge Join Operator [MERGEJOIN_890] (rows=696954748 width=217) Conds:(Inner),(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 24 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_183] - Group By Operator [GBY_79] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Union 23 [CUSTOM_SIMPLE_EDGE] - <-Reducer 22 [CONTAINS] - Reduce Output Operator [RS_78] - Group By Operator [GBY_77] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_75] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_63] (rows=316788826 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_849] (rows=316788826 width=135) - Conds:RS_60._col0=RS_61._col0(Inner),Output:["_col1","_col2"] - <-Map 21 [SIMPLE_EDGE] - SHUFFLE [RS_61] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_15] - <-Map 43 [SIMPLE_EDGE] - SHUFFLE [RS_60] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_56] - <-Reducer 36 [CONTAINS] - Reduce Output Operator [RS_78] - Group By Operator [GBY_77] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_75] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_74] (rows=158402938 width=135) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_850] (rows=158402938 width=135) - Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2"] - <-Map 35 [SIMPLE_EDGE] - SHUFFLE [RS_72] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_26] - <-Map 44 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_67] - <-Reducer 40 [CONTAINS] - Reduce Output Operator [RS_78] - Group By Operator [GBY_77] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(_col0)"] - Select Operator [SEL_75] (rows=1108786976 width=108) - Output:["_col0"] - Select Operator [SEL_53] (rows=633595212 width=88) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_848] (rows=633595212 width=88) - Conds:RS_50._col0=RS_51._col0(Inner),Output:["_col1","_col2"] - <-Map 86 [SIMPLE_EDGE] - SHUFFLE [RS_51] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_98] - <-Map 39 [SIMPLE_EDGE] - SHUFFLE [RS_50] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_46] + Select Operator [SEL_80] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_79] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Union 23 [CUSTOM_SIMPLE_EDGE] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_78] + Group By Operator [GBY_77] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_75] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_63] (rows=316788826 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_849] (rows=316788826 width=135) + Conds:RS_60._col0=RS_61._col0(Inner),Output:["_col1","_col2"] + <-Map 21 [SIMPLE_EDGE] + SHUFFLE [RS_61] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_15] + <-Map 43 [SIMPLE_EDGE] + SHUFFLE [RS_60] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_56] + <-Reducer 36 [CONTAINS] + Reduce Output Operator [RS_78] + Group By Operator [GBY_77] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_75] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_74] (rows=158402938 width=135) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_850] (rows=158402938 width=135) + Conds:RS_71._col0=RS_72._col0(Inner),Output:["_col1","_col2"] + <-Map 35 [SIMPLE_EDGE] + SHUFFLE [RS_72] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_26] + <-Map 44 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_67] + <-Reducer 40 [CONTAINS] + Reduce Output Operator [RS_78] + Group By Operator [GBY_77] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(_col0)","count(_col0)"] + Select Operator [SEL_75] (rows=1108786976 width=108) + Output:["_col0"] + Select Operator [SEL_53] (rows=633595212 width=88) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_848] (rows=633595212 width=88) + Conds:RS_50._col0=RS_51._col0(Inner),Output:["_col1","_col2"] + <-Map 86 [SIMPLE_EDGE] + SHUFFLE [RS_51] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_98] + <-Map 39 [SIMPLE_EDGE] + SHUFFLE [RS_50] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_46] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_182] Select Operator [SEL_43] (rows=1 width=8) diff --git a/ql/src/test/results/clientpositive/perf/tez/query17.q.out b/ql/src/test/results/clientpositive/perf/tez/query17.q.out index 7c195c2f1d..5e68fe3c97 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query17.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query17.q.out @@ -104,122 +104,124 @@ Stage-0 limit:100 Stage-1 Reducer 7 - File Output Operator [FS_55] - Limit [LIM_54] (rows=100 width=88) + File Output Operator [FS_56] + Limit [LIM_55] (rows=100 width=88) Number of rows:100 - Select Operator [SEL_53] (rows=421657640 width=88) + Select Operator [SEL_54] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 6 [SIMPLE_EDGE] - SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=421657640 width=88) + SHUFFLE [RS_53] + Select Operator [SEL_52] (rows=421657640 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] - Group By Operator [GBY_50] (rows=421657640 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["count(VALUE._col0)","avg(VALUE._col1)","stddev_samp(VALUE._col2)","count(VALUE._col3)","avg(VALUE._col4)","stddev_samp(VALUE._col5)","count(VALUE._col6)","avg(VALUE._col7)","stddev_samp(VALUE._col8)"],keys:KEY._col0, KEY._col1, KEY._col2 + Group By Operator [GBY_51] (rows=421657640 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","count(VALUE._col4)","sum(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","sum(VALUE._col9)","sum(VALUE._col10)","sum(VALUE._col11)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_49] + SHUFFLE [RS_50] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["count(_col5)","avg(_col5)","stddev_samp(_col5)","count(_col21)","avg(_col21)","stddev_samp(_col21)","count(_col14)","avg(_col14)","stddev_samp(_col14)"],keys:_col9, _col10, _col25 - Merge Join Operator [MERGEJOIN_98] (rows=843315281 width=88) - Conds:RS_44._col3=RS_45._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col0 - Select Operator [SEL_34] (rows=1704 width=1910) - Output:["_col0","_col1"] - Filter Operator [FIL_91] (rows=1704 width=1910) - predicate:s_store_sk is not null - TableScan [TS_32] (rows=1704 width=1910) - default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_97] (rows=766650239 width=88) - Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col7, _col8, _col9 - Merge Join Operator [MERGEJOIN_96] (rows=348467716 width=135) - Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col1, _col2 - Merge Join Operator [MERGEJOIN_95] (rows=63350266 width=77) - Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col2","_col3","_col4"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_90] (rows=36525 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=57591150 width=77) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_89] (rows=57591150 width=77) - predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) - TableScan [TS_15] (rows=57591150 width=77) - default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col2, _col1 - Merge Join Operator [MERGEJOIN_94] (rows=316788826 width=135) - Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=36525 width=1119) - Output:["_col0"] - Filter Operator [FIL_88] (rows=36525 width=1119) - predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_87] (rows=287989836 width=135) - predicate:(cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_9] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col1, _col2, _col4 - Merge Join Operator [MERGEJOIN_93] (rows=696954748 width=88) - Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=462000 width=1436) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_86] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_92] (rows=633595212 width=88) - Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_85] (rows=36524 width=1119) - predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_84] (rows=575995635 width=88) - predicate:(ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] + Group By Operator [GBY_49] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"],aggregations:["count(_col3)","sum(_col3)","sum(_col7)","sum(_col6)","count(_col4)","sum(_col4)","sum(_col9)","sum(_col8)","count(_col5)","sum(_col5)","sum(_col11)","sum(_col10)"],keys:_col0, _col1, _col2 + Select Operator [SEL_47] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] + Merge Join Operator [MERGEJOIN_99] (rows=843315281 width=88) + Conds:RS_44._col3=RS_45._col0(Inner),Output:["_col5","_col9","_col10","_col14","_col21","_col25"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col0 + Select Operator [SEL_34] (rows=1704 width=1910) + Output:["_col0","_col1"] + Filter Operator [FIL_92] (rows=1704 width=1910) + predicate:s_store_sk is not null + TableScan [TS_32] (rows=1704 width=1910) + default@store,store,Tbl:COMPLETE,Col:NONE,Output:["s_store_sk","s_state"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_98] (rows=766650239 width=88) + Conds:RS_41._col1, _col2, _col4=RS_42._col7, _col8, _col9(Inner),Output:["_col3","_col5","_col9","_col10","_col14","_col21"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col7, _col8, _col9 + Merge Join Operator [MERGEJOIN_97] (rows=348467716 width=135) + Conds:RS_28._col2, _col1=RS_29._col1, _col2(Inner),Output:["_col3","_col7","_col8","_col9","_col10"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col1, _col2 + Merge Join Operator [MERGEJOIN_96] (rows=63350266 width=77) + Conds:RS_21._col0=RS_22._col0(Inner),Output:["_col1","_col2","_col3","_col4"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_91] (rows=36525 width=1119) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_quarter_name"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=57591150 width=77) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_90] (rows=57591150 width=77) + predicate:(sr_customer_sk is not null and sr_item_sk is not null and sr_returned_date_sk is not null and sr_ticket_number is not null) + TableScan [TS_15] (rows=57591150 width=77) + default@store_returns,store_returns,Tbl:COMPLETE,Col:NONE,Output:["sr_returned_date_sk","sr_item_sk","sr_customer_sk","sr_ticket_number","sr_return_quantity"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col2, _col1 + Merge Join Operator [MERGEJOIN_95] (rows=316788826 width=135) + Conds:RS_25._col0=RS_26._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=36525 width=1119) + Output:["_col0"] + Filter Operator [FIL_89] (rows=36525 width=1119) + predicate:((d_quarter_name) IN ('2000Q1', '2000Q2', '2000Q3') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_88] (rows=287989836 width=135) + predicate:(cs_bill_customer_sk is not null and cs_item_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_9] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk","cs_quantity"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col1, _col2, _col4 + Merge Join Operator [MERGEJOIN_94] (rows=696954748 width=88) + Conds:RS_38._col1=RS_39._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col9","_col10"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=462000 width=1436) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_87] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id","i_item_desc"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_93] (rows=633595212 width=88) + Conds:RS_35._col0=RS_36._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_86] (rows=36524 width=1119) + predicate:((d_quarter_name = '2000Q1') and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_85] (rows=575995635 width=88) + predicate:(ss_customer_sk is not null and ss_item_sk is not null and ss_sold_date_sk is not null and ss_store_sk is not null and ss_ticket_number is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk","ss_store_sk","ss_ticket_number","ss_quantity"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query18.q.out b/ql/src/test/results/clientpositive/perf/tez/query18.q.out index adb9682fc2..c37fc9f53e 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query18.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query18.q.out @@ -91,12 +91,12 @@ Stage-0 Select Operator [SEL_45] (rows=1054114882 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] Group By Operator [GBY_44] (rows=1054114882 width=135) - Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)","avg(VALUE._col4)","avg(VALUE._col5)","avg(VALUE._col6)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + Output:["_col0","_col1","_col2","_col3","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)","sum(VALUE._col8)","count(VALUE._col9)","sum(VALUE._col10)","count(VALUE._col11)","sum(VALUE._col12)","count(VALUE._col13)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_43] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_42] (rows=2108229765 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],aggregations:["avg(_col4)","avg(_col5)","avg(_col6)","avg(_col7)","avg(_col8)","avg(_col9)","avg(_col10)"],keys:_col0, _col1, _col2, _col3, 0L + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col6)","count(_col6)","sum(_col7)","count(_col7)","sum(_col8)","count(_col8)","sum(_col9)","count(_col9)","sum(_col10)","count(_col10)"],keys:_col0, _col1, _col2, _col3, 0L Select Operator [SEL_40] (rows=421645953 width=135) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] Merge Join Operator [MERGEJOIN_83] (rows=421645953 width=135) diff --git a/ql/src/test/results/clientpositive/perf/tez/query22.q.out b/ql/src/test/results/clientpositive/perf/tez/query22.q.out index efc87ede40..9a04651daf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query22.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query22.q.out @@ -54,58 +54,58 @@ Stage-0 limit:100 Stage-1 Reducer 6 - File Output Operator [FS_29] - Limit [LIM_28] (rows=100 width=15) + File Output Operator [FS_30] + Limit [LIM_29] (rows=100 width=15) Number of rows:100 - Select Operator [SEL_27] (rows=125060762 width=15) + Select Operator [SEL_28] (rows=125060762 width=15) Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_26] + SHUFFLE [RS_27] Select Operator [SEL_25] (rows=125060762 width=15) Output:["_col0","_col1","_col2","_col3","_col4"] Group By Operator [GBY_24] (rows=125060762 width=15) - Output:["_col0","_col1","_col2","_col3","_col5"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 + Output:["_col0","_col1","_col2","_col3","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0, _col1, _col2, _col3, _col4 Group By Operator [GBY_22] (rows=250121525 width=15) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["avg(_col3)"],keys:_col8, _col9, _col10, _col11, 0L - Merge Join Operator [MERGEJOIN_45] (rows=50024305 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)"],keys:_col8, _col9, _col10, _col11, 0L + Merge Join Operator [MERGEJOIN_46] (rows=50024305 width=15) Conds:RS_18._col1=RS_19._col0(Inner),Output:["_col3","_col8","_col9","_col10","_col11"] <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 Select Operator [SEL_11] (rows=462000 width=1436) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_42] (rows=462000 width=1436) + Filter Operator [FIL_43] (rows=462000 width=1436) predicate:i_item_sk is not null TableScan [TS_9] (rows=462000 width=1436) default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_brand","i_class","i_category","i_product_name"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_44] (rows=45476640 width=15) + Merge Join Operator [MERGEJOIN_45] (rows=45476640 width=15) Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col1","_col3"] <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_16] PartitionCols:_col0 Select Operator [SEL_8] (rows=27 width=1029) Output:["_col0"] - Filter Operator [FIL_41] (rows=27 width=1029) + Filter Operator [FIL_42] (rows=27 width=1029) predicate:w_warehouse_sk is not null TableScan [TS_6] (rows=27 width=1029) default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_43] (rows=41342400 width=15) + Merge Join Operator [MERGEJOIN_44] (rows=41342400 width=15) Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col0 Select Operator [SEL_2] (rows=37584000 width=15) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_39] (rows=37584000 width=15) + Filter Operator [FIL_40] (rows=37584000 width=15) predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) TableScan [TS_0] (rows=37584000 width=15) default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] @@ -114,7 +114,7 @@ Stage-0 PartitionCols:_col0 Select Operator [SEL_5] (rows=8116 width=1119) Output:["_col0"] - Filter Operator [FIL_40] (rows=8116 width=1119) + Filter Operator [FIL_41] (rows=8116 width=1119) predicate:(d_date_sk is not null and d_month_seq BETWEEN 1212 AND 1223) TableScan [TS_3] (rows=73049 width=1119) default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query24.q.out b/ql/src/test/results/clientpositive/perf/tez/query24.q.out index 5cbbea335f..42e6a37dba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query24.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query24.q.out @@ -121,22 +121,22 @@ Stage-0 Stage-1 Reducer 8 File Output Operator [FS_91] - Select Operator [SEL_90] (rows=77303902 width=489) + Select Operator [SEL_90] (rows=77303902 width=321) Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_89] (rows=77303902 width=489) + Filter Operator [FIL_89] (rows=77303902 width=321) predicate:(_col3 > _col4) - Merge Join Operator [MERGEJOIN_154] (rows=231911707 width=489) + Merge Join Operator [MERGEJOIN_154] (rows=231911707 width=321) Conds:(Inner),Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_87] - Select Operator [SEL_85] (rows=1 width=400) + Select Operator [SEL_85] (rows=1 width=232) Output:["_col0"] - Group By Operator [GBY_84] (rows=1 width=400) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] + Group By Operator [GBY_84] (rows=1 width=232) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_83] - Group By Operator [GBY_82] (rows=1 width=400) - Output:["_col0"],aggregations:["avg(_col10)"] + Group By Operator [GBY_82] (rows=1 width=232) + Output:["_col0","_col1"],aggregations:["sum(_col10)","count(_col10)"] Select Operator [SEL_80] (rows=463823414 width=88) Output:["_col10"] Group By Operator [GBY_79] (rows=463823414 width=88) diff --git a/ql/src/test/results/clientpositive/perf/tez/query26.q.out b/ql/src/test/results/clientpositive/perf/tez/query26.q.out index 8990298822..45b70acacf 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query26.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query26.q.out @@ -60,73 +60,75 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_32] - Group By Operator [GBY_30] (rows=210822976 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=421645953 width=135) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["avg(_col4)","avg(_col5)","avg(_col7)","avg(_col6)"],keys:_col18 - Merge Join Operator [MERGEJOIN_58] (rows=421645953 width=135) - Conds:RS_24._col2=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_54] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_57] (rows=383314495 width=135) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=2300 width=1179) - Output:["_col0"] - Filter Operator [FIL_53] (rows=2300 width=1179) - predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_56] (rows=348467716 width=135) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_52] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_55] (rows=316788826 width=135) - Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Select Operator [SEL_2] (rows=287989836 width=135) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_50] (rows=287989836 width=135) - predicate:(cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) - TableScan [TS_0] (rows=287989836 width=135) - default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=232725 width=385) - Output:["_col0"] - Filter Operator [FIL_51] (rows=232725 width=385) - predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + Select Operator [SEL_31] (rows=210822976 width=135) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_30] (rows=210822976 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=421645953 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col18 + Merge Join Operator [MERGEJOIN_58] (rows=421645953 width=135) + Conds:RS_24._col2=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_54] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_12] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_57] (rows=383314495 width=135) + Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=2300 width=1179) + Output:["_col0"] + Filter Operator [FIL_53] (rows=2300 width=1179) + predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) + TableScan [TS_9] (rows=2300 width=1179) + default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_56] (rows=348467716 width=135) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_52] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_55] (rows=316788826 width=135) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col0","_col2","_col3","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Select Operator [SEL_2] (rows=287989836 width=135) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_50] (rows=287989836 width=135) + predicate:(cs_bill_cdemo_sk is not null and cs_item_sk is not null and cs_promo_sk is not null and cs_sold_date_sk is not null) + TableScan [TS_0] (rows=287989836 width=135) + default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_cdemo_sk","cs_item_sk","cs_promo_sk","cs_quantity","cs_list_price","cs_sales_price","cs_coupon_amt"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=232725 width=385) + Output:["_col0"] + Filter Operator [FIL_51] (rows=232725 width=385) + predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) + TableScan [TS_3] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query27.q.out b/ql/src/test/results/clientpositive/perf/tez/query27.q.out index adb1bb7f1e..b16e740ed8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query27.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query27.q.out @@ -67,12 +67,12 @@ Stage-0 Select Operator [SEL_32] (rows=1264972921 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Group By Operator [GBY_31] (rows=1264972921 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_30] PartitionCols:_col0, _col1, _col2 Group By Operator [GBY_29] (rows=2529945843 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["avg(_col2)","avg(_col3)","avg(_col4)","avg(_col5)"],keys:_col0, _col1, 0L + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"],aggregations:["sum(_col2)","count(_col2)","sum(_col3)","count(_col3)","sum(_col4)","count(_col4)","sum(_col5)","count(_col5)"],keys:_col0, _col1, 0L Select Operator [SEL_27] (rows=843315281 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Merge Join Operator [MERGEJOIN_59] (rows=843315281 width=88) diff --git a/ql/src/test/results/clientpositive/perf/tez/query28.q.out b/ql/src/test/results/clientpositive/perf/tez/query28.q.out index 33dc1ae513..086ac492ac 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query28.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query28.q.out @@ -126,131 +126,143 @@ Stage-0 Stage-1 Reducer 4 File Output Operator [FS_51] - Limit [LIM_50] (rows=1 width=3505) + Limit [LIM_50] (rows=1 width=1393) Number of rows:100 - Select Operator [SEL_49] (rows=1 width=3505) + Select Operator [SEL_49] (rows=1 width=1393) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] - Merge Join Operator [MERGEJOIN_94] (rows=1 width=3505) + Merge Join Operator [MERGEJOIN_94] (rows=1 width=1393) Conds:(Inner),(Inner),(Inner),(Inner),(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_45] - Group By Operator [GBY_81] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 9 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_80] - Group By Operator [GBY_79] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_78] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_77] - PartitionCols:_col0 - Group By Operator [GBY_76] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_23] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_55] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] + Select Operator [SEL_27] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_81] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_80] + Group By Operator [GBY_79] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_78] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_77] + PartitionCols:_col0 + Group By Operator [GBY_76] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_23] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_55] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 142 AND 152 or ss_coupon_amt BETWEEN 3054 AND 4054 or ss_wholesale_cost BETWEEN 80 AND 100) and ss_quantity BETWEEN 16 AND 20) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_wholesale_cost","ss_list_price","ss_coupon_amt"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_46] - Group By Operator [GBY_87] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 11 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_86] - Group By Operator [GBY_85] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_84] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_83] - PartitionCols:_col0 - Group By Operator [GBY_82] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_30] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_56] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_34] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_87] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 11 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_86] + Group By Operator [GBY_85] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_84] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_83] + PartitionCols:_col0 + Group By Operator [GBY_82] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_30] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_56] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 66 AND 76 or ss_coupon_amt BETWEEN 920 AND 1920 or ss_wholesale_cost BETWEEN 4 AND 24) and ss_quantity BETWEEN 11 AND 15) + Please refer to the previous TableScan [TS_0] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_47] - Group By Operator [GBY_93] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 13 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_92] - Group By Operator [GBY_91] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_90] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_89] - PartitionCols:_col0 - Group By Operator [GBY_88] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_37] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_57] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_41] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_93] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 13 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_92] + Group By Operator [GBY_91] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_90] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_89] + PartitionCols:_col0 + Group By Operator [GBY_88] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_37] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_57] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 91 AND 101 or ss_coupon_amt BETWEEN 1430 AND 2430 or ss_wholesale_cost BETWEEN 32 AND 52) and ss_quantity BETWEEN 6 AND 10) + Please refer to the previous TableScan [TS_0] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_42] - Group By Operator [GBY_63] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 2 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_62] - Group By Operator [GBY_61] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_60] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_59] - PartitionCols:_col0 - Group By Operator [GBY_58] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_2] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_52] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_6] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_63] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_62] + Group By Operator [GBY_61] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_60] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_59] + PartitionCols:_col0 + Group By Operator [GBY_58] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_2] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_52] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 11 AND 21 or ss_coupon_amt BETWEEN 460 AND 1460 or ss_wholesale_cost BETWEEN 14 AND 34) and ss_quantity BETWEEN 0 AND 5) + Please refer to the previous TableScan [TS_0] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_43] - Group By Operator [GBY_69] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 5 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_68] - Group By Operator [GBY_67] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_66] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_65] - PartitionCols:_col0 - Group By Operator [GBY_64] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_9] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_53] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_13] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_69] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_68] + Group By Operator [GBY_67] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_66] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_65] + PartitionCols:_col0 + Group By Operator [GBY_64] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_9] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_53] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 28 AND 38 or ss_coupon_amt BETWEEN 2513 AND 3513 or ss_wholesale_cost BETWEEN 42 AND 62) and ss_quantity BETWEEN 26 AND 30) + Please refer to the previous TableScan [TS_0] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_44] - Group By Operator [GBY_75] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] - <-Reducer 7 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_74] - Group By Operator [GBY_73] (rows=1 width=584) - Output:["_col0","_col1","_col2"],aggregations:["avg(_col1)","count(_col2)","count(_col0)"] - Group By Operator [GBY_72] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Group By Operator [GBY_70] (rows=21333171 width=88) - Output:["_col0","_col1","_col2"],aggregations:["avg(ss_list_price)","count(ss_list_price)"],keys:ss_list_price - Select Operator [SEL_16] (rows=21333171 width=88) - Output:["ss_list_price"] - Filter Operator [FIL_54] (rows=21333171 width=88) - predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) - Please refer to the previous TableScan [TS_0] + Select Operator [SEL_20] (rows=1 width=232) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_75] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_74] + Group By Operator [GBY_73] (rows=1 width=232) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col2)","count(_col0)"] + Group By Operator [GBY_72] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Group By Operator [GBY_70] (rows=21333171 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_list_price)","count(ss_list_price)"],keys:ss_list_price + Select Operator [SEL_16] (rows=21333171 width=88) + Output:["ss_list_price"] + Filter Operator [FIL_54] (rows=21333171 width=88) + predicate:((ss_list_price BETWEEN 135 AND 145 or ss_coupon_amt BETWEEN 14180 AND 15180 or ss_wholesale_cost BETWEEN 38 AND 58) and ss_quantity BETWEEN 21 AND 25) + Please refer to the previous TableScan [TS_0] diff --git a/ql/src/test/results/clientpositive/perf/tez/query30.q.out b/ql/src/test/results/clientpositive/perf/tez/query30.q.out index 93ce9cd7eb..7b6822e317 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query30.q.out @@ -126,7 +126,7 @@ Stage-0 Select Operator [SEL_50] (rows=11000000 width=1014) Output:["_col0","_col1","_col2"] Group By Operator [GBY_49] (rows=11000000 width=1014) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 Select Operator [SEL_45] (rows=22000000 width=1014) Output:["_col0","_col2"] Group By Operator [GBY_44] (rows=22000000 width=1014) diff --git a/ql/src/test/results/clientpositive/perf/tez/query32.q.out b/ql/src/test/results/clientpositive/perf/tez/query32.q.out index adb4377fb6..1cf1b145a8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query32.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query32.q.out @@ -124,11 +124,11 @@ Stage-0 Select Operator [SEL_19] (rows=158394413 width=135) Output:["_col0","_col1"] Group By Operator [GBY_18] (rows=158394413 width=135) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=316788826 width=135) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 Please refer to the previous Merge Join Operator [MERGEJOIN_58] diff --git a/ql/src/test/results/clientpositive/perf/tez/query35.q.out b/ql/src/test/results/clientpositive/perf/tez/query35.q.out index decbadbdcd..9df9fe8295 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query35.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query35.q.out @@ -139,12 +139,12 @@ Stage-0 Select Operator [SEL_65] (rows=1045432122 width=88) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col9","_col10","_col11","_col12","_col14","_col15","_col16","_col17"] Group By Operator [GBY_64] (rows=1045432122 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","avg(VALUE._col1)","max(VALUE._col2)","sum(VALUE._col3)","avg(VALUE._col4)","max(VALUE._col5)","sum(VALUE._col6)","avg(VALUE._col7)","max(VALUE._col8)","sum(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","max(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","max(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)","max(VALUE._col9)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5 <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_63] PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5 Group By Operator [GBY_62] (rows=2090864244 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","avg(_col8)","max(_col8)","sum(_col8)","avg(_col9)","max(_col9)","sum(_col9)","avg(_col10)","max(_col10)","sum(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"],aggregations:["count()","sum(_col8)","count(_col8)","max(_col8)","sum(_col9)","count(_col9)","max(_col9)","sum(_col10)","count(_col10)","max(_col10)"],keys:_col4, _col6, _col7, _col8, _col9, _col10 Select Operator [SEL_61] (rows=2090864244 width=88) Output:["_col4","_col6","_col7","_col8","_col9","_col10"] Filter Operator [FIL_60] (rows=2090864244 width=88) diff --git a/ql/src/test/results/clientpositive/perf/tez/query39.q.out b/ql/src/test/results/clientpositive/perf/tez/query39.q.out index fdcd651351..bbb21b05ee 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query39.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query39.q.out @@ -74,7 +74,7 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_60] - Merge Join Operator [MERGEJOIN_104] (rows=13756683 width=15) + Merge Join Operator [MERGEJOIN_106] (rows=13756683 width=15) Conds:RS_56._col0, _col1=RS_57._col0, _col1(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 11 [SIMPLE_EDGE] SHUFFLE [RS_57] @@ -82,112 +82,116 @@ Stage-0 Select Operator [SEL_55] (rows=12506076 width=15) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_54] (rows=12506076 width=15) - predicate:CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END - Select Operator [SEL_53] (rows=25012152 width=15) - Output:["_col1","_col2","_col3","_col4"] - Group By Operator [GBY_52] (rows=25012152 width=15) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["stddev_samp(VALUE._col0)","avg(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + predicate:CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END + Select Operator [SEL_99] (rows=25012152 width=15) + Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_53] (rows=25012152 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_51] + SHUFFLE [RS_52] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_50] (rows=50024305 width=15) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["stddev_samp(_col3)","avg(_col3)"],keys:_col8, _col7, _col9 - Merge Join Operator [MERGEJOIN_103] (rows=50024305 width=15) - Conds:RS_46._col2=RS_47._col0(Inner),Output:["_col3","_col7","_col8","_col9"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_47] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=27 width=1029) - Output:["_col0","_col1"] - Filter Operator [FIL_93] (rows=27 width=1029) - predicate:w_warehouse_sk is not null - TableScan [TS_9] (rows=27 width=1029) - default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_46] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_102] (rows=45476640 width=15) - Conds:RS_43._col1=RS_44._col0(Inner),Output:["_col2","_col3","_col7"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=462000 width=1436) - Output:["_col0"] - Filter Operator [FIL_92] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_6] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_101] (rows=41342400 width=15) - Conds:RS_40._col0=RS_41._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=37584000 width=15) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_90] (rows=37584000 width=15) - predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) - TableScan [TS_0] (rows=37584000 width=15) - default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_33] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_95] (rows=18262 width=1119) - predicate:((d_moy = 5) and (d_year = 1999) and d_date_sk is not null) - TableScan [TS_3] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] + Group By Operator [GBY_51] (rows=50024305 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 + Select Operator [SEL_49] (rows=50024305 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_105] (rows=50024305 width=15) + Conds:RS_46._col2=RS_47._col0(Inner),Output:["_col3","_col7","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_47] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=27 width=1029) + Output:["_col0","_col1"] + Filter Operator [FIL_93] (rows=27 width=1029) + predicate:w_warehouse_sk is not null + TableScan [TS_9] (rows=27 width=1029) + default@warehouse,warehouse,Tbl:COMPLETE,Col:NONE,Output:["w_warehouse_sk","w_warehouse_name"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_46] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_104] (rows=45476640 width=15) + Conds:RS_43._col1=RS_44._col0(Inner),Output:["_col2","_col3","_col7"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=462000 width=1436) + Output:["_col0"] + Filter Operator [FIL_92] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_6] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_103] (rows=41342400 width=15) + Conds:RS_40._col0=RS_41._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=37584000 width=15) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_90] (rows=37584000 width=15) + predicate:(inv_date_sk is not null and inv_item_sk is not null and inv_warehouse_sk is not null) + TableScan [TS_0] (rows=37584000 width=15) + default@inventory,inventory,Tbl:COMPLETE,Col:NONE,Output:["inv_date_sk","inv_item_sk","inv_warehouse_sk","inv_quantity_on_hand"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_33] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_95] (rows=18262 width=1119) + predicate:((d_moy = 5) and (d_year = 1999) and d_date_sk is not null) + TableScan [TS_3] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year","d_moy"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_56] PartitionCols:_col0, _col1 Select Operator [SEL_27] (rows=12506076 width=15) Output:["_col0","_col1","_col2","_col3"] Filter Operator [FIL_26] (rows=12506076 width=15) - predicate:CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END - Select Operator [SEL_25] (rows=25012152 width=15) - Output:["_col1","_col2","_col3","_col4"] - Group By Operator [GBY_24] (rows=25012152 width=15) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["stddev_samp(VALUE._col0)","avg(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2 + predicate:CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END + Select Operator [SEL_98] (rows=25012152 width=15) + Output:["_col0","_col1","_col3","_col4","_col5","_col6"] + Group By Operator [GBY_25] (rows=25012152 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_23] + SHUFFLE [RS_24] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_22] (rows=50024305 width=15) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["stddev_samp(_col3)","avg(_col3)"],keys:_col8, _col7, _col9 - Merge Join Operator [MERGEJOIN_100] (rows=50024305 width=15) - Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col7","_col8","_col9"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_11] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_99] (rows=45476640 width=15) - Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col7"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_8] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_98] (rows=41342400 width=15) - Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_12] - PartitionCols:_col0 - Please refer to the previous Select Operator [SEL_2] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_13] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=18262 width=1119) - Output:["_col0"] - Filter Operator [FIL_91] (rows=18262 width=1119) - predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) - Please refer to the previous TableScan [TS_3] + Group By Operator [GBY_23] (rows=50024305 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col3)","count(_col3)","sum(_col5)","sum(_col4)"],keys:_col1, _col2, _col0 + Select Operator [SEL_21] (rows=50024305 width=15) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + Merge Join Operator [MERGEJOIN_102] (rows=50024305 width=15) + Conds:RS_18._col2=RS_19._col0(Inner),Output:["_col3","_col7","_col8","_col9"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_11] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_101] (rows=45476640 width=15) + Conds:RS_15._col1=RS_16._col0(Inner),Output:["_col2","_col3","_col7"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_8] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_100] (rows=41342400 width=15) + Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_12] + PartitionCols:_col0 + Please refer to the previous Select Operator [SEL_2] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_13] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=18262 width=1119) + Output:["_col0"] + Filter Operator [FIL_91] (rows=18262 width=1119) + predicate:((d_moy = 4) and (d_year = 1999) and d_date_sk is not null) + Please refer to the previous TableScan [TS_3] PREHOOK: query: with inv as (select w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy diff --git a/ql/src/test/results/clientpositive/perf/tez/query44.q.out b/ql/src/test/results/clientpositive/perf/tez/query44.q.out index b982de041f..e8bf377205 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query44.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query44.q.out @@ -135,12 +135,12 @@ Stage-0 Select Operator [SEL_25] (rows=71999454 width=88) Output:["_col0"] Group By Operator [GBY_24] (rows=71999454 width=88) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 12 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 Group By Operator [GBY_22] (rows=143998908 width=88) - Output:["_col0","_col1"],aggregations:["avg(_col1)"],keys:410 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col1)","count(_col1)"],keys:410 Select Operator [SEL_20] (rows=143998908 width=88) Output:["_col1"] Filter Operator [FIL_124] (rows=143998908 width=88) @@ -149,19 +149,21 @@ Stage-0 default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_hdemo_sk","ss_store_sk","ss_net_profit"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_35] - Group By Operator [GBY_31] (rows=143998908 width=88) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=287997817 width=88) - Output:["_col0","_col1"],aggregations:["avg(ss_net_profit)"],keys:ss_item_sk - Select Operator [SEL_28] (rows=287997817 width=88) - Output:["ss_item_sk","ss_net_profit"] - Filter Operator [FIL_125] (rows=287997817 width=88) - predicate:(ss_store_sk = 410) - TableScan [TS_26] (rows=575995635 width=88) - default@store_sales,ss1,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] + Select Operator [SEL_32] (rows=143998908 width=88) + Output:["_col0","_col1"] + Group By Operator [GBY_31] (rows=143998908 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=287997817 width=88) + Output:["_col0","_col1","_col2"],aggregations:["sum(ss_net_profit)","count(ss_net_profit)"],keys:ss_item_sk + Select Operator [SEL_28] (rows=287997817 width=88) + Output:["ss_item_sk","ss_net_profit"] + Filter Operator [FIL_125] (rows=287997817 width=88) + predicate:(ss_store_sk = 410) + TableScan [TS_26] (rows=575995635 width=88) + default@store_sales,ss1,Tbl:COMPLETE,Col:NONE,Output:["ss_item_sk","ss_store_sk","ss_net_profit"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_33] Select Operator [SEL_17] (rows=1 width=8) diff --git a/ql/src/test/results/clientpositive/perf/tez/query6.q.out b/ql/src/test/results/clientpositive/perf/tez/query6.q.out index 1cd69f755a..4e5236a719 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query6.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query6.q.out @@ -142,12 +142,12 @@ Stage-0 Select Operator [SEL_29] (rows=231000 width=1436) Output:["_col0","_col1","_col2"] Group By Operator [GBY_28] (rows=231000 width=1436) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Map 18 [SIMPLE_EDGE] SHUFFLE [RS_27] PartitionCols:_col0 Group By Operator [GBY_26] (rows=462000 width=1436) - Output:["_col0","_col1"],aggregations:["avg(i_current_price)"],keys:i_category + Output:["_col0","_col1","_col2"],aggregations:["sum(i_current_price)","count(i_current_price)"],keys:i_category Filter Operator [FIL_105] (rows=462000 width=1436) predicate:i_category is not null TableScan [TS_23] (rows=462000 width=1436) diff --git a/ql/src/test/results/clientpositive/perf/tez/query65.q.out b/ql/src/test/results/clientpositive/perf/tez/query65.q.out index 0091ad022e..9fa189d1fc 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query65.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query65.q.out @@ -142,7 +142,7 @@ Stage-0 Select Operator [SEL_32] (rows=158398803 width=88) Output:["_col0","_col1"] Group By Operator [GBY_31] (rows=158398803 width=88) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 Select Operator [SEL_27] (rows=316797606 width=88) Output:["_col1","_col2"] Group By Operator [GBY_26] (rows=316797606 width=88) diff --git a/ql/src/test/results/clientpositive/perf/tez/query7.q.out b/ql/src/test/results/clientpositive/perf/tez/query7.q.out index 00628dbce9..cfe4044052 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query7.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query7.q.out @@ -60,73 +60,75 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4"] <-Reducer 6 [SIMPLE_EDGE] SHUFFLE [RS_32] - Group By Operator [GBY_30] (rows=421657640 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)","avg(VALUE._col3)"],keys:KEY._col0 - <-Reducer 5 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col0 - Group By Operator [GBY_28] (rows=843315281 width=88) - Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["avg(_col4)","avg(_col5)","avg(_col7)","avg(_col6)"],keys:_col18 - Merge Join Operator [MERGEJOIN_58] (rows=843315281 width=88) - Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=462000 width=1436) - Output:["_col0","_col1"] - Filter Operator [FIL_54] (rows=462000 width=1436) - predicate:i_item_sk is not null - TableScan [TS_12] (rows=462000 width=1436) - default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_57] (rows=766650239 width=88) - Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=2300 width=1179) - Output:["_col0"] - Filter Operator [FIL_53] (rows=2300 width=1179) - predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) - TableScan [TS_9] (rows=2300 width=1179) - default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_56] (rows=696954748 width=88) - Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 9 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col0 - Select Operator [SEL_8] (rows=36524 width=1119) - Output:["_col0"] - Filter Operator [FIL_52] (rows=36524 width=1119) - predicate:((d_year = 1998) and d_date_sk is not null) - TableScan [TS_6] (rows=73049 width=1119) - default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_55] (rows=633595212 width=88) - Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col2 - Select Operator [SEL_2] (rows=575995635 width=88) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - Filter Operator [FIL_50] (rows=575995635 width=88) - predicate:(ss_cdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) - TableScan [TS_0] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=232725 width=385) - Output:["_col0"] - Filter Operator [FIL_51] (rows=232725 width=385) - predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) - TableScan [TS_3] (rows=1861800 width=385) - default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] + Select Operator [SEL_31] (rows=421657640 width=88) + Output:["_col0","_col1","_col2","_col3","_col4"] + Group By Operator [GBY_30] (rows=421657640 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","count(VALUE._col7)"],keys:KEY._col0 + <-Reducer 5 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col0 + Group By Operator [GBY_28] (rows=843315281 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col4)","count(_col4)","sum(_col5)","count(_col5)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col18 + Merge Join Operator [MERGEJOIN_58] (rows=843315281 width=88) + Conds:RS_24._col1=RS_25._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col18"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=462000 width=1436) + Output:["_col0","_col1"] + Filter Operator [FIL_54] (rows=462000 width=1436) + predicate:i_item_sk is not null + TableScan [TS_12] (rows=462000 width=1436) + default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_item_id"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_57] (rows=766650239 width=88) + Conds:RS_21._col3=RS_22._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=2300 width=1179) + Output:["_col0"] + Filter Operator [FIL_53] (rows=2300 width=1179) + predicate:(((p_channel_email = 'N') or (p_channel_event = 'N')) and p_promo_sk is not null) + TableScan [TS_9] (rows=2300 width=1179) + default@promotion,promotion,Tbl:COMPLETE,Col:NONE,Output:["p_promo_sk","p_channel_email","p_channel_event"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_56] (rows=696954748 width=88) + Conds:RS_18._col0=RS_19._col0(Inner),Output:["_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 9 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col0 + Select Operator [SEL_8] (rows=36524 width=1119) + Output:["_col0"] + Filter Operator [FIL_52] (rows=36524 width=1119) + predicate:((d_year = 1998) and d_date_sk is not null) + TableScan [TS_6] (rows=73049 width=1119) + default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_year"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Merge Join Operator [MERGEJOIN_55] (rows=633595212 width=88) + Conds:RS_15._col2=RS_16._col0(Inner),Output:["_col0","_col1","_col3","_col4","_col5","_col6","_col7"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col2 + Select Operator [SEL_2] (rows=575995635 width=88) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + Filter Operator [FIL_50] (rows=575995635 width=88) + predicate:(ss_cdemo_sk is not null and ss_item_sk is not null and ss_promo_sk is not null and ss_sold_date_sk is not null) + TableScan [TS_0] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_cdemo_sk","ss_promo_sk","ss_quantity","ss_list_price","ss_sales_price","ss_coupon_amt"] + <-Map 8 [SIMPLE_EDGE] + SHUFFLE [RS_16] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=232725 width=385) + Output:["_col0"] + Filter Operator [FIL_51] (rows=232725 width=385) + predicate:((cd_education_status = 'Primary') and (cd_gender = 'F') and (cd_marital_status = 'W') and cd_demo_sk is not null) + TableScan [TS_3] (rows=1861800 width=385) + default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:NONE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status"] diff --git a/ql/src/test/results/clientpositive/perf/tez/query81.q.out b/ql/src/test/results/clientpositive/perf/tez/query81.q.out index 5fb04b29f7..f5d20edfc8 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query81.q.out @@ -105,7 +105,7 @@ Stage-0 Select Operator [SEL_50] (rows=11000000 width=1014) Output:["_col0","_col1","_col2"] Group By Operator [GBY_49] (rows=11000000 width=1014) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col0 Select Operator [SEL_45] (rows=22000000 width=1014) Output:["_col0","_col2"] Group By Operator [GBY_44] (rows=22000000 width=1014) diff --git a/ql/src/test/results/clientpositive/perf/tez/query85.q.out b/ql/src/test/results/clientpositive/perf/tez/query85.q.out index abba10da7a..b7e41d8616 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -192,12 +192,12 @@ Stage-0 Select Operator [SEL_51] (rows=1023990 width=385) Output:["_col4","_col5","_col6","_col7"] Group By Operator [GBY_50] (rows=1023990 width=385) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(VALUE._col0)","avg(VALUE._col1)","avg(VALUE._col2)"],keys:KEY._col0 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)","sum(VALUE._col2)","count(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)"],keys:KEY._col0 <-Reducer 8 [SIMPLE_EDGE] SHUFFLE [RS_49] PartitionCols:_col0 Group By Operator [GBY_48] (rows=2047980 width=385) - Output:["_col0","_col1","_col2","_col3"],aggregations:["avg(_col12)","avg(_col7)","avg(_col6)"],keys:_col22 + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["sum(_col12)","count(_col12)","sum(_col7)","count(_col7)","sum(_col6)","count(_col6)"],keys:_col22 Merge Join Operator [MERGEJOIN_106] (rows=2047980 width=385) Conds:RS_44._col3, _col24, _col25=RS_45._col0, _col1, _col2(Inner),Output:["_col6","_col7","_col12","_col22"] <-Map 16 [SIMPLE_EDGE] diff --git a/ql/src/test/results/clientpositive/perf/tez/query9.q.out b/ql/src/test/results/clientpositive/perf/tez/query9.q.out index 24de6f2e3a..8df7ed90ba 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query9.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query9.q.out @@ -151,69 +151,73 @@ Stage-0 Stage-1 Reducer 16 File Output Operator [FS_154] - Select Operator [SEL_153] (rows=36 width=3135) + Select Operator [SEL_153] (rows=36 width=1455) Output:["_col0","_col1","_col2","_col3","_col4"] - Merge Join Operator [MERGEJOIN_185] (rows=36 width=3135) + Merge Join Operator [MERGEJOIN_185] (rows=36 width=1455) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15"] <-Reducer 15 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_150] - Merge Join Operator [MERGEJOIN_184] (rows=36 width=2846) + Merge Join Operator [MERGEJOIN_184] (rows=36 width=1334) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14"] <-Reducer 14 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_147] - Merge Join Operator [MERGEJOIN_183] (rows=36 width=2557) + Merge Join Operator [MERGEJOIN_183] (rows=36 width=1213) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"] <-Reducer 13 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_144] - Merge Join Operator [MERGEJOIN_182] (rows=36 width=2548) + Merge Join Operator [MERGEJOIN_182] (rows=36 width=1204) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"] <-Reducer 12 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_141] - Merge Join Operator [MERGEJOIN_181] (rows=36 width=2259) + Merge Join Operator [MERGEJOIN_181] (rows=36 width=1083) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] <-Reducer 11 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_138] - Merge Join Operator [MERGEJOIN_180] (rows=36 width=1970) + Merge Join Operator [MERGEJOIN_180] (rows=36 width=962) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10"] <-Reducer 10 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_135] - Merge Join Operator [MERGEJOIN_179] (rows=36 width=1961) + Merge Join Operator [MERGEJOIN_179] (rows=36 width=953) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"] <-Reducer 34 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_133] - Group By Operator [GBY_64] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_63] - Group By Operator [GBY_62] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_net_paid_inc_tax)"] - Select Operator [SEL_61] (rows=63999515 width=88) - Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_164] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 41 AND 60 - TableScan [TS_80] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_net_paid_inc_tax"] + Select Operator [SEL_65] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_64] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_63] + Group By Operator [GBY_62] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_61] (rows=63999515 width=88) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_164] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 41 AND 60 + TableScan [TS_80] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_net_paid_inc_tax"] <-Reducer 9 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_132] - Merge Join Operator [MERGEJOIN_178] (rows=36 width=1672) + Merge Join Operator [MERGEJOIN_178] (rows=36 width=832) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] <-Reducer 28 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_130] - Group By Operator [GBY_57] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_56] - Group By Operator [GBY_55] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_ext_list_price)"] - Select Operator [SEL_54] (rows=63999515 width=88) - Output:["ss_ext_list_price"] - Filter Operator [FIL_163] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 41 AND 60 - TableScan [TS_73] (rows=575995635 width=88) - default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_ext_list_price"] + Select Operator [SEL_58] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_57] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_56] + Group By Operator [GBY_55] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_54] (rows=63999515 width=88) + Output:["ss_ext_list_price"] + Filter Operator [FIL_163] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 41 AND 60 + TableScan [TS_73] (rows=575995635 width=88) + default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity","ss_ext_list_price"] <-Reducer 8 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_129] - Merge Join Operator [MERGEJOIN_177] (rows=36 width=1383) + Merge Join Operator [MERGEJOIN_177] (rows=36 width=711) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 22 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_127] @@ -230,41 +234,45 @@ Stage-0 default@store_sales,store_sales,Tbl:COMPLETE,Col:NONE,Output:["ss_quantity"] <-Reducer 7 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_126] - Merge Join Operator [MERGEJOIN_176] (rows=36 width=1374) + Merge Join Operator [MERGEJOIN_176] (rows=36 width=702) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 33 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_124] - Group By Operator [GBY_43] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_42] - Group By Operator [GBY_41] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_net_paid_inc_tax)"] - Select Operator [SEL_40] (rows=63999515 width=88) - Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_161] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 21 AND 40 - Please refer to the previous TableScan [TS_80] + Select Operator [SEL_44] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_43] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_42] + Group By Operator [GBY_41] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_40] (rows=63999515 width=88) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_161] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 21 AND 40 + Please refer to the previous TableScan [TS_80] <-Reducer 6 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_123] - Merge Join Operator [MERGEJOIN_175] (rows=36 width=1085) + Merge Join Operator [MERGEJOIN_175] (rows=36 width=581) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4","_col5"] <-Reducer 27 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_121] - Group By Operator [GBY_36] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_35] - Group By Operator [GBY_34] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_ext_list_price)"] - Select Operator [SEL_33] (rows=63999515 width=88) - Output:["ss_ext_list_price"] - Filter Operator [FIL_160] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 21 AND 40 - Please refer to the previous TableScan [TS_73] + Select Operator [SEL_37] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_36] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_35] + Group By Operator [GBY_34] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_33] (rows=63999515 width=88) + Output:["ss_ext_list_price"] + Filter Operator [FIL_160] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 21 AND 40 + Please refer to the previous TableScan [TS_73] <-Reducer 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_120] - Merge Join Operator [MERGEJOIN_174] (rows=36 width=796) + Merge Join Operator [MERGEJOIN_174] (rows=36 width=460) Conds:(Left Outer),Output:["_col1","_col2","_col3","_col4"] <-Reducer 21 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_118] @@ -280,11 +288,11 @@ Stage-0 Please refer to the previous TableScan [TS_66] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_117] - Merge Join Operator [MERGEJOIN_173] (rows=36 width=787) + Merge Join Operator [MERGEJOIN_173] (rows=36 width=451) Conds:(Left Outer),Output:["_col1","_col2","_col3"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_114] - Merge Join Operator [MERGEJOIN_172] (rows=36 width=498) + Merge Join Operator [MERGEJOIN_172] (rows=36 width=330) Conds:(Left Outer),Output:["_col1","_col2"] <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_111] @@ -311,30 +319,34 @@ Stage-0 Please refer to the previous TableScan [TS_66] <-Reducer 26 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_112] - Group By Operator [GBY_15] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_14] - Group By Operator [GBY_13] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_ext_list_price)"] - Select Operator [SEL_12] (rows=63999515 width=88) - Output:["ss_ext_list_price"] - Filter Operator [FIL_157] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 1 AND 20 - Please refer to the previous TableScan [TS_73] + Select Operator [SEL_16] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_15] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_14] + Group By Operator [GBY_13] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_12] (rows=63999515 width=88) + Output:["ss_ext_list_price"] + Filter Operator [FIL_157] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 1 AND 20 + Please refer to the previous TableScan [TS_73] <-Reducer 32 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_115] - Group By Operator [GBY_22] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_21] - Group By Operator [GBY_20] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_net_paid_inc_tax)"] - Select Operator [SEL_19] (rows=63999515 width=88) - Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_158] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 1 AND 20 - Please refer to the previous TableScan [TS_80] + Select Operator [SEL_23] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_22] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_21] + Group By Operator [GBY_20] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_19] (rows=63999515 width=88) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_158] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 1 AND 20 + Please refer to the previous TableScan [TS_80] <-Reducer 18 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_136] Group By Operator [GBY_71] (rows=1 width=8) @@ -349,30 +361,34 @@ Stage-0 Please refer to the previous TableScan [TS_66] <-Reducer 24 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_139] - Group By Operator [GBY_78] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_77] - Group By Operator [GBY_76] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_ext_list_price)"] - Select Operator [SEL_75] (rows=63999515 width=88) - Output:["ss_ext_list_price"] - Filter Operator [FIL_166] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 61 AND 80 - Please refer to the previous TableScan [TS_73] + Select Operator [SEL_79] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_78] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_77] + Group By Operator [GBY_76] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_75] (rows=63999515 width=88) + Output:["ss_ext_list_price"] + Filter Operator [FIL_166] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 61 AND 80 + Please refer to the previous TableScan [TS_73] <-Reducer 30 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_142] - Group By Operator [GBY_85] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_84] - Group By Operator [GBY_83] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_net_paid_inc_tax)"] - Select Operator [SEL_82] (rows=63999515 width=88) - Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_167] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 61 AND 80 - Please refer to the previous TableScan [TS_80] + Select Operator [SEL_86] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_85] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_84] + Group By Operator [GBY_83] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_82] (rows=63999515 width=88) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_167] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 61 AND 80 + Please refer to the previous TableScan [TS_80] <-Reducer 19 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_145] Group By Operator [GBY_92] (rows=1 width=8) @@ -387,28 +403,32 @@ Stage-0 Please refer to the previous TableScan [TS_66] <-Reducer 25 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_148] - Group By Operator [GBY_99] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 23 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_98] - Group By Operator [GBY_97] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_ext_list_price)"] - Select Operator [SEL_96] (rows=63999515 width=88) - Output:["ss_ext_list_price"] - Filter Operator [FIL_169] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 81 AND 100 - Please refer to the previous TableScan [TS_73] + Select Operator [SEL_100] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_99] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 23 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_98] + Group By Operator [GBY_97] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_ext_list_price)","count(ss_ext_list_price)"] + Select Operator [SEL_96] (rows=63999515 width=88) + Output:["ss_ext_list_price"] + Filter Operator [FIL_169] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 81 AND 100 + Please refer to the previous TableScan [TS_73] <-Reducer 31 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_151] - Group By Operator [GBY_106] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 29 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_105] - Group By Operator [GBY_104] (rows=1 width=288) - Output:["_col0"],aggregations:["avg(ss_net_paid_inc_tax)"] - Select Operator [SEL_103] (rows=63999515 width=88) - Output:["ss_net_paid_inc_tax"] - Filter Operator [FIL_170] (rows=63999515 width=88) - predicate:ss_quantity BETWEEN 81 AND 100 - Please refer to the previous TableScan [TS_80] + Select Operator [SEL_107] (rows=1 width=120) + Output:["_col0"] + Group By Operator [GBY_106] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 29 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_105] + Group By Operator [GBY_104] (rows=1 width=120) + Output:["_col0","_col1"],aggregations:["sum(ss_net_paid_inc_tax)","count(ss_net_paid_inc_tax)"] + Select Operator [SEL_103] (rows=63999515 width=88) + Output:["ss_net_paid_inc_tax"] + Filter Operator [FIL_170] (rows=63999515 width=88) + predicate:ss_quantity BETWEEN 81 AND 100 + Please refer to the previous TableScan [TS_80] diff --git a/ql/src/test/results/clientpositive/perf/tez/query92.q.out b/ql/src/test/results/clientpositive/perf/tez/query92.q.out index 6009fdf171..5118ebe1d2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/query92.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/query92.q.out @@ -135,11 +135,11 @@ Stage-0 Select Operator [SEL_19] (rows=79201469 width=135) Output:["_col0","_col1"] Group By Operator [GBY_18] (rows=79201469 width=135) - Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0 + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_17] PartitionCols:_col0 Group By Operator [GBY_16] (rows=158402938 width=135) - Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1 + Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1 Please refer to the previous Merge Join Operator [MERGEJOIN_61] diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index cf78358e27..3faa06a633 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -925,35 +925,39 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col6 (type: string), _col5 (type: int), _col4 (type: int), _col2 (type: int), UDFToDouble(_col2) (type: double), (UDFToDouble(_col2) * UDFToDouble(_col2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Group By Operator + aggregations: sum(_col3), count(_col3), sum(_col5), sum(_col4) + keys: _col1 (type: int), _col2 (type: int), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) Reducer 15 Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END (type: boolean) + predicate: CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0D)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) + expressions: _col0 (type: int), _col1 (type: int), (_col3 / _col4) (type: double), CASE WHEN (((_col3 / _col4) = 0)) THEN (null) ELSE ((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4))) END (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator @@ -1004,35 +1008,39 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col2, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: stddev_samp(_col2), avg(_col2) - keys: _col5 (type: int), _col4 (type: int), _col6 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: _col6 (type: string), _col5 (type: int), _col4 (type: int), _col2 (type: int), UDFToDouble(_col2) (type: double), (UDFToDouble(_col2) * UDFToDouble(_col2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Group By Operator + aggregations: sum(_col3), count(_col3), sum(_col5), sum(_col4) + keys: _col1 (type: int), _col2 (type: int), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), avg(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col3 (type: double), _col4 (type: double) - outputColumnNames: _col1, _col2, _col3, _col4 + expressions: _col0 (type: int), _col1 (type: int), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: CASE WHEN ((_col4 = 0.0D)) THEN (false) ELSE (((_col3 / _col4) > 1.0D)) END (type: boolean) + predicate: CASE WHEN (((_col3 / _col4) = 0)) THEN (false) ELSE (((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4)) > 1.0D)) END (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator - expressions: _col1 (type: int), _col2 (type: int), _col4 (type: double), CASE WHEN ((_col4 = 0.0D)) THEN (null) ELSE ((_col3 / _col4)) END (type: double) + expressions: _col0 (type: int), _col1 (type: int), (_col3 / _col4) (type: double), CASE WHEN (((_col3 / _col4) = 0)) THEN (null) ELSE ((power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) / (_col3 / _col4))) END (type: double) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out index c314763db2..7bdc3a0b9c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -51,13 +51,13 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: substr(value, 5) (type: string) sort order: + - Map-reduce partition columns: _col0 (type: string) + Map-reduce partition columns: substr(value, 5) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 9caf47b676..0d5f352122 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -50,34 +50,34 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -128,7 +128,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 678276115f..d5974ce57f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -54,34 +54,34 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0) + aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -135,10 +135,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -153,4 +153,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 79136.0 309.0 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index 2f101601fe..c7c8d6cb3e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -51,12 +51,12 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: value (type: string) + outputColumnNames: value Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0) - keys: _col0 (type: string) + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index d07eea5fe6..904fcf394e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -50,27 +50,28 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col1 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0) + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -121,7 +122,7 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -136,4 +137,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index 8bcb912991..1dc870d796 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -54,27 +54,28 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: substr(value, 5) (type: string) - outputColumnNames: _col0 + expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double), _col1 (type: double) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0) + aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1) mode: complete - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double) + expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -128,10 +129,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] @@ -146,4 +147,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0 +130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0 diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index 82ba94a52b..19d28c5a42 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -283,10 +283,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) @@ -294,25 +294,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out index 62c26e6224..650d2e08a5 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown2.q.out @@ -29,10 +29,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -40,25 +40,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -129,10 +133,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -140,25 +144,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -229,10 +237,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -240,25 +248,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -329,10 +341,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -340,25 +352,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -429,10 +445,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -440,25 +456,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -529,10 +549,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col1 (type: string), _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) @@ -540,25 +560,29 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col2 (type: struct) + value expressions: _col2 (type: double), _col3 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -854,30 +878,34 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) keys: _col0 (type: string) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: double), _col0 (type: string) - sort order: +- + Select Operator + expressions: _col0 (type: string), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 + Reduce Output Operator + key expressions: _col1 (type: double), _col0 (type: string) + sort order: +- + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 Reducer 3 Reduce Operator Tree: Select Operator @@ -965,28 +993,28 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct) + value expressions: _col3 (type: double), _col4 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator @@ -1050,28 +1078,28 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct) + value expressions: _col3 (type: double), _col4 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index f0318a3c6c..7d23ba8f22 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -403,18 +403,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -426,22 +426,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -452,12 +458,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,7 +503,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +-4.344925324321378 1158.3003004768175 1158.3003004768175 1158.426587033782 34.03381113652741 34.03381113652741 34.03381113652741 34.03566639620535 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), @@ -903,18 +909,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint + expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -926,22 +932,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -952,12 +964,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -997,7 +1009,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +-1.8515862077935246E8 2.07689300543066035E18 2.07689300543066035E18 2.07711944383072922E18 1.441142951074133E9 1.441142951074133E9 1.441142951074133E9 1.4412215110213728E9 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), @@ -1403,18 +1415,18 @@ STAGE PLANS: alias: alltypesparquet Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat + expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -1426,22 +1438,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -1452,12 +1470,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1497,7 +1515,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +-4.303895780321011 1163.8972588605056 1163.8972588605056 1164.0241556397098 34.11593848717203 34.11593848717203 34.11593848717203 34.11779822379677 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), @@ -1584,25 +1602,26 @@ STAGE PLANS: predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cfloat + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4] + projectedOutputColumnNums: [3, 4, 0, 14, 17] + selectExpressions: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 16:double) -> 17:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1610,9 +1629,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -1628,26 +1647,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double] + scratchColumnTypeNames: [decimal(13,3), double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), min(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinLong(col 6:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0D + _col0) (type: double), _col1 (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) + (-6432.0D + _col0)) (type: double), _col2 (type: double), (- (-6432.0D + _col0)) (type: double), (-6432.0D + (- (-6432.0D + _col0))) (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) / (- (-6432.0D + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0D + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + expressions: (_col0 / _col1) (type: double), (- (_col0 / _col1)) (type: double), (-6432.0D + (_col0 / _col1)) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) + (-6432.0D + (_col0 / _col1))) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), (-6432.0D + (- (-6432.0D + (_col0 / _col1)))) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) / (- (-6432.0D + (_col0 / _col1)))) (type: double), _col4 (type: bigint), _col5 (type: double), (((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) % power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5)) (type: double), (- ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), ((- (-6432.0D + (_col0 / _col1))) * (- (_col0 / _col1))) (type: double), _col6 (type: tinyint), (- _col6) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [7, 9, 10, 8, 11, 13, 14, 12, 19, 18, 22, 4, 5, 25, 20, 28, 6, 27] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 9:double, DoubleScalarAddDoubleColumn(val -6432.0, col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 10:double, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 8:double) -> 11:double) -> 8:double) -> 11:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 11:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double) -> 11:double, DoubleColAddDoubleColumn(col 12:double, col 14:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 13:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 13:double) -> 14:double) -> 13:double, DoubleColDivideLongColumn(col 12:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 12:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 12:double) -> 14:double) -> 12:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double, DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 18:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColDivideDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 20:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 20:double) -> 21:double) -> 20:double, DoubleColUnaryMinus(col 22:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 22:double) -> 21:double) -> 22:double, DoubleColModuloDoubleColumn(col 21:double, col 20:double)(children: DoubleColDivideLongColumn(col 20:double, col 24:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 17:boolean, null, col 23:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 23:bigint) -> 24:bigint) -> 21:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 24:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 21:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 21:double, col 29:double)(children: DoubleColUnaryMinus(col 28:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 28:double) -> 21:double, DoubleColUnaryMinus(col 28:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 28:double) -> 29:double) -> 28:double, LongColUnaryMinus(col 6:tinyint) -> 27:tinyint + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1716,7 +1759,7 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593508E9 3.875716535945533E8 0.0 2.06347151720190515E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.051665108770714E9 -2.06347151720190515E18 1.5020929380914048E17 -64 64 PREHOOK: query: EXPLAIN extended select count(*) from alltypesparquet where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 18379f257b..010b19e354 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -71,25 +71,26 @@ STAGE PLANS: predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, cfloat, cdouble + expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5] + projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -97,9 +98,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -115,26 +116,50 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:tinyint, VALUE._col5:int, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFMaxLong(col 4:tinyint) -> tinyint, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 10, 3, 12, 14, 4, 13, 5, 17, 18, 22, 9, 21] + selectExpressions: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 11:double) -> 10:double) -> 11:double, DoubleColDivideDoubleScalar(col 12:double, val -26.28)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 12:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 12:double) -> 10:double) -> 12:double) -> 10:double, DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 15:double) -> 13:double, DecimalColMultiplyDecimalScalar(col 16:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 5:int) -> 16:decimal(10,0)) -> 17:decimal(16,3), DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 18:double)(children: DoubleColDivideLongColumn(col 15:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 15:double) -> 18:double) -> 15:double, IfExprNullCondExpr(col 19:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 18:double, DoubleScalarModuloDoubleColumn(val 10.175, col 15:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 15:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double, LongScalarModuloLongColumn(val -563, col 5:int) -> 21:int + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +215,4 @@ WHERE (((cdouble > ctinyint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563 +1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index df85ce3c66..fd2947dd12 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -94,26 +94,27 @@ STAGE PLANS: predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) - outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 5, 6, 10] + projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: - aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop + aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4] - keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) @@ -124,9 +125,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumnNums: [4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -142,27 +143,55 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:bigint, VALUE._col6:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumLong(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double) + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 20:double) -> 15:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 22:double, DecimalScalarAddDecimalColumn(val -5638.15, col 23:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 23:decimal(19,0)) -> 24:decimal(22,2), DoubleColDivideDoubleColumn(col 21:double, col 25:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 21:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 25:double) -> 26:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 25:double) -> 21:double) -> 25:double, DoubleColAddDoubleColumn(col 27:double, col 28:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 27:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 28:double) -> 21:double, FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] Statistics: Num rows: 1877 Data size: 22524 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index e4db32c9f3..bce1f8a98f 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -96,26 +96,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -126,9 +127,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -144,27 +145,55 @@ STAGE PLANS: includeColumns: [0, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4)] + scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:float, VALUE._col9:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -419,26 +448,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -449,7 +479,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -461,25 +491,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1365 Data size: 16380 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index aef374a007..03afcc102f 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -96,27 +96,27 @@ STAGE PLANS: predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 4, 6, 10, 5, 14] - selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double + projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] + selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 14:double) -> struct aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) @@ -127,9 +127,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumnNums: [5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) + value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -145,27 +145,55 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:float, VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDouble(col 8:float) -> float, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 13:double) -> 15:double) -> 13:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 15:float, DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleColUnaryMinus(col 8:float) -> 20:float, DoubleColDivideDoubleScalar(col 22:double, val 10.175)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 21:double) -> 22:double) -> 21:double, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleScalar(col 24:double, val 10.175)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 23:double) -> 24:double) -> 23:double) -> 24:double, DoubleScalarModuloDoubleColumn(val -1.389, col 23:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 23:double) -> 25:double) -> 23:double, IfExprNullCondExpr(col 18:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 23:double) -> 25:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 23:double, DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColModuloDoubleScalar(col 30:double, val 10.175)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 27:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 30:double) -> 34:double Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] Statistics: Num rows: 303 Data size: 3636 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 24cdf069f6..126cfd0795 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -92,26 +92,27 @@ STAGE PLANS: predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10] + projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) @@ -122,9 +123,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4, 5, 6] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) + value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -140,7 +141,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double, double] Reducer 2 Reduce Vectorization: enabled: false @@ -148,13 +149,13 @@ STAGE PLANS: enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index a35c9c586f..303702cef0 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -69,39 +69,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumnNums: [3, 4, 5, 6] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -117,26 +118,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 8b3c5f2a2d..423d2e3f96 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -75,25 +75,26 @@ STAGE PLANS: predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3, 4, 5] + projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double Statistics: Num rows: 4778 Data size: 57336 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -101,9 +102,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -119,26 +120,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:bigint, VALUE._col7:tinyint, VALUE._col8:double, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 % -563.0D) (type: double), (_col0 + 762.0D) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0D) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) % -563.0D) (type: double), ((_col0 / _col1) + 762.0D) (type: double), _col2 (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), (- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) (type: double), (_col2 - (_col0 / _col1)) (type: double), _col6 (type: bigint), (- (_col2 - (_col0 / _col1))) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / _col5) - 762.0D) (type: double), _col7 (type: tinyint), ((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) (type: double), (_col8 / _col9) (type: double), (((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) - _col2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 12, 13, 2, 14, 11, 16, 6, 15, 17, 7, 20, 18, 19] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 10:double, DoubleColModuloDoubleScalar(col 11:double, val -563.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double, DoubleColAddDoubleScalar(col 11:double, val 762.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 14:double) -> 11:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 15:double) -> 11:double) -> 15:double) -> 11:double, DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 17:double) -> 15:double, DoubleColSubtractDoubleScalar(col 18:double, val 762.0)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, DoubleColAddDoubleColumn(col 18:double, col 19:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 18:double) -> 19:double) -> 18:double) -> 19:double) -> 18:double, CastLongToDouble(col 7:tinyint) -> 19:double) -> 20:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 18:double, DoubleColSubtractDoubleColumn(col 22:double, col 2:double)(children: DoubleColAddDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 19:double) -> 21:double) -> 19:double) -> 21:double) -> 19:double, CastLongToDouble(col 7:tinyint) -> 21:double) -> 22:double) -> 19:double + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,4 +223,4 @@ WHERE (((ctimestamp1 < ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378778E18 -1.49936299222378778E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378701E18 -64 -1.49936299222378778E18 -5650.1297631138395 -1.49936299222378496E18 +-5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378906E18 -1.49936299222378906E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378829E18 -64 -1.49936299222378906E18 -5650.1297631138395 -1.49936299222378624E18 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index dd3532b4ff..955f85c327 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -80,25 +80,26 @@ STAGE PLANS: predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, csmallint, cint, cfloat + expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4] + projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double Statistics: Num rows: 2503 Data size: 30036 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 19:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -106,9 +107,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -124,26 +125,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumLong(col 10:bigint) -> bigint, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 13:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 - 10.175D) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175D)) (type: double), (- _col1) (type: double), (_col0 % 79.553D) (type: double), (- (_col0 * (_col0 - 10.175D))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175D))) / (_col0 - 10.175D)) (type: double), (- (_col0 - 10.175D)) (type: double), _col4 (type: double), (-3728.0D - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + expressions: power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5)) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) % 79.553D) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5) (type: double), (- power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), _col9 (type: double), ((- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) / (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (_col10 / _col11) (type: double), (-3728.0D - power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), power(((_col12 - ((_col13 * _col13) / _col11)) / _col11), 0.5) (type: double), ((_col10 / _col11) / power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [14, 19, 15, 23, 26, 29, 22, 32, 40, 9, 43, 35, 46, 54, 53, 59] + selectExpressions: FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColSubtractDoubleScalar(col 15:double, val 10.175)(children: FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 15:double) -> 19:double) -> 15:double, IfExprNullCondExpr(col 18:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 19:double) -> 15:double) -> 19:double, FuncPowerDoubleToDouble(col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 22:double, col 26:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 23:double) -> 22:double, IfExprNullCondExpr(col 21:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 22:double, DoubleColSubtractDoubleScalar(col 23:double, val 10.175)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 23:double) -> 26:double) -> 23:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 23:double) -> 26:double) -> 23:double, DoubleColUnaryMinus(col 22:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double, DoubleColModuloDoubleScalar(col 22:double, val 79.553)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 31:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 29:double) -> 22:double, IfExprNullCondExpr(col 28:boolean, null, col 30:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 30:bigint) -> 31:bigint) -> 29:double) -> 22:double) -> 29:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColMultiplyDoubleColumn(col 22:double, col 35:double)(children: FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 32:double) -> 22:double, IfExprNullCondExpr(col 31:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 31:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 32:double) -> 22:double, DoubleColSubtractDoubleScalar(col 32:double, val 10.175)(children: FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 34:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double) -> 32:double) -> 35:double) -> 32:double) -> 22:double, FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 39:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 37:boolean, null, col 38:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 38:bigint) -> 39:bigint) -> 35:double) -> 32:double, DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 42:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 40:double) -> 35:double, IfExprNullCondExpr(col 39:boolean, null, col 41:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 39:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 41:bigint) -> 42:bigint) -> 40:double) -> 35:double) -> 40:double, DoubleColDivideDoubleColumn(col 35:double, col 46:double)(children: DoubleColUnaryMinus(col 43:double)(children: DoubleColMultiplyDoubleColumn(col 35:double, col 46:double)(children: FuncPowerDoubleToDouble(col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 43:double) -> 35:double, IfExprNullCondExpr(col 42:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 42:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 45:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 45:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 50:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 48:boolean, null, col 49:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 48:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 49:bigint) -> 50:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColSubtractDoubleScalar(col 35:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 52:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 46:double) -> 35:double, IfExprNullCondExpr(col 50:boolean, null, col 51:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 50:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 51:bigint) -> 52:bigint) -> 46:double) -> 35:double) -> 46:double) -> 35:double, LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 46:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 53:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 56:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 53:double) -> 54:double) -> 53:double, IfExprNullCondExpr(col 52:boolean, null, col 55:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 52:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 55:bigint) -> 56:bigint) -> 54:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 53:double) -> 57:double) -> 53:double) -> 57:double) -> 53:double, DoubleColDivideDoubleColumn(col 57:double, col 58:double)(children: LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 57:double, FuncPowerDoubleToDouble(col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 61:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 58:double) -> 59:double) -> 58:double, IfExprNullCondExpr(col 56:boolean, null, col 60:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 56:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 60:bigint) -> 61:bigint) -> 59:double) -> 58:double) -> 59:double + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -208,4 +233,4 @@ WHERE (((cint <= cfloat) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### -0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.34690095515641 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL +0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.3469009551564 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 4a7b0e08f8..c3b539238c 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -75,17 +75,18 @@ STAGE PLANS: predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) - outputColumnNames: ctinyint, cint, cdouble + expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 5] + projectedOutputColumnNums: [2, 5, 0, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -93,7 +94,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -102,8 +103,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -119,26 +120,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 5] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint, VALUE._col4:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2)) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2))) (type: double) + expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (- power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5)) (type: double), (_col2 / _col3) (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3))) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 6, 7, 9, 8, 11, 15, 14, 13, 18, 4, 4, 19] + selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, FuncPowerDoubleToDouble(col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 7:double) -> 8:double) -> 7:double) -> 8:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: FuncPowerDoubleToDouble(col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 8:double, LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 11:bigint, DoubleColDivideDoubleColumn(col 13:double, col 14:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 14:double) -> 15:double, DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleColumn(col 13:double, col 16:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 16:double) -> 17:double) -> 13:double, LongColSubtractLongColumn(col 10:bigint, col 12:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 10:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 12:bigint) -> 18:bigint, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 4:tinyint) -> 16:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 17:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 19:double) -> 20:double) -> 17:double) -> 19:double + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,4 +223,4 @@ WHERE (((csmallint >= cint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --493101012745 277615870175435 -493101016473 136727.7868296355 -136727.7868296355 2298.5515807767374 0 0.0 1.8694487691330246E10 -0.0 -278108971191908 -64 -64 0.0 +-493101012745 277615870175435 -493101016473 136727.78682963562 -136727.78682963562 2298.5515807767374 0 0.0 1.8694487691330276E10 -0.0 -278108971191908 -64 -64 0.0 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a35c9c586f..303702cef0 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -69,39 +69,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumnNums: [3, 4, 5, 6] Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -117,26 +118,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 24576 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index ce188a058a..6fd173a958 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -257,18 +257,18 @@ STAGE PLANS: selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -280,10 +280,10 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] - valueColumnNums: [1] + valueColumnNums: [1, 2] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -311,41 +311,50 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:tinyint, VALUE._col0:struct + dataColumnCount: 3 + dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 1:struct) -> double + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:tinyint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator + Select Operator + expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + projectedOutputColumnNums: [0, 3] + selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double + Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 240 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out index e581007c80..e8fa9dd9b7 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_not.q.out @@ -55,4 +55,4 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesparquet #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593508E9 3.875716535945533E8 0.0 2.06347151720190515E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.051665108770714E9 -2.06347151720190515E18 1.5020929380914048E17 -64 64 diff --git a/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out index a95898f8ef..212a83e8f9 100644 --- a/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out @@ -32,14 +32,14 @@ STAGE PLANS: outputColumnNames: cbigint Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint) + aggregations: sum(cbigint), count(cbigint) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -60,17 +60,21 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out index 1e5d4569cf..8e4828cb7b 100644 --- a/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/spark/spark_explainuser_1.q.out @@ -2127,42 +2127,42 @@ Stage-0 limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_21] - Join Operator [JOIN_19] (rows=6 width=227) + File Output Operator [FS_22] + Join Operator [JOIN_20] (rows=6 width=227) Output:["_col0","_col1","_col2"],condition map:[{"":"{\"type\":\"Left Semi\",\"left\":0,\"right\":1}"}],keys:{"0":"_col1","1":"_col0"} <-Reducer 2 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_17] + PARTITION-LEVEL SORT [RS_18] PartitionCols:_col1 Select Operator [SEL_6] (rows=13 width=227) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=227) - Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1 + Group By Operator [GBY_5] (rows=13 width=235) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1 <-Map 1 [GROUP] GROUP [RS_4] PartitionCols:_col0, _col1 - Group By Operator [GBY_3] (rows=13 width=295) - Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr - Filter Operator [FIL_22] (rows=26 width=223) + Group By Operator [GBY_3] (rows=13 width=235) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(p_size)","count(p_size)"],keys:p_name, p_mfgr + Filter Operator [FIL_23] (rows=26 width=223) predicate:p_name is not null TableScan [TS_0] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"] <-Reducer 5 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_18] + PARTITION-LEVEL SORT [RS_19] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=13 width=184) + Group By Operator [GBY_17] (rows=13 width=184) Output:["_col0"],keys:_col0 - Select Operator [SEL_11] (rows=26 width=184) + Select Operator [SEL_12] (rows=26 width=184) Output:["_col0"] - Filter Operator [FIL_23] (rows=26 width=491) + Filter Operator [FIL_24] (rows=26 width=491) predicate:first_value_window_0 is not null - PTF Operator [PTF_10] (rows=26 width=491) + PTF Operator [PTF_11] (rows=26 width=491) Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_9] (rows=26 width=491) + Select Operator [SEL_10] (rows=26 width=491) Output:["_col1","_col2","_col5"] <-Map 4 [PARTITION-LEVEL SORT] - PARTITION-LEVEL SORT [RS_8] + PARTITION-LEVEL SORT [RS_9] PartitionCols:p_mfgr - TableScan [TS_7] (rows=26 width=223) + TableScan [TS_8] (rows=26 width=223) default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"] PREHOOK: query: explain select * @@ -2385,23 +2385,25 @@ Stage-0 PARTITION-LEVEL SORT [RS_22] Group By Operator [GBY_12] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["count()","count(_col0)"] - Group By Operator [GBY_7] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] - <-Map 5 [GROUP] - GROUP [RS_6] - Group By Operator [GBY_5] (rows=1 width=76) - Output:["_col0"],aggregations:["avg(p_size)"] - Filter Operator [FIL_32] (rows=8 width=4) - predicate:(p_size < 10) - TableScan [TS_2] (rows=26 width=4) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] + Select Operator [SEL_8] (rows=1 width=16) + Output:["_col0"] + Group By Operator [GBY_7] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] + <-Map 5 [GROUP] + GROUP [RS_6] + Group By Operator [GBY_5] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"] + Filter Operator [FIL_32] (rows=8 width=4) + predicate:(p_size < 10) + TableScan [TS_2] (rows=26 width=4) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_size"] <-Reducer 8 [PARTITION-LEVEL SORT] PARTITION-LEVEL SORT [RS_25] PartitionCols:_col0 Select Operator [SEL_20] (rows=1 width=12) Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=8) - Output:["_col0"],aggregations:["avg(VALUE._col0)"] + Group By Operator [GBY_19] (rows=1 width=16) + Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"] <- Please refer to the previous Map 5 [GROUP] PREHOOK: query: explain select b.p_mfgr, min(p_retailprice) diff --git a/ql/src/test/results/clientpositive/spark/subquery_in.q.out b/ql/src/test/results/clientpositive/spark/subquery_in.q.out index 7df930039a..170160ed7e 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_in.q.out @@ -341,29 +341,33 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 is not null and _col1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -1212,17 +1216,17 @@ STAGE PLANS: predicate: l_partkey is not null (type: boolean) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(l_quantity) + aggregations: sum(l_quantity), count(l_quantity) keys: l_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Join Operator @@ -1275,16 +1279,16 @@ STAGE PLANS: Reducer 7 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 is not null (type: boolean) + predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: double), _col0 (type: int) + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 50 Data size: 5999 Basic stats: COMPLETE Column stats: NONE Group By Operator @@ -4232,17 +4236,17 @@ STAGE PLANS: predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 2 Reduce Operator Tree: Join Operator @@ -4263,24 +4267,28 @@ STAGE PLANS: Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col1 is not null (type: boolean) + predicate: (_col1 is not null and _col2 is not null) (type: boolean) Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int), _col1 (type: double) - mode: hash + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: double) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Group By Operator + keys: _col0 (type: int), _col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: double) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: double) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -4787,27 +4795,27 @@ STAGE PLANS: predicate: p_partkey is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) keys: p_partkey (type: int) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 10 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: double), _col0 (type: int) + expressions: (_col1 / _col2) (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -4908,23 +4916,27 @@ STAGE PLANS: Reducer 8 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col1) - keys: _col0 (type: int) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col1) + keys: _col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6 Data size: 726 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out index 32e0983bf4..4d9ec011f3 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multi.q.out @@ -2842,14 +2842,14 @@ STAGE PLANS: outputColumnNames: l_quantity Statistics: Num rows: 100 Data size: 11999 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(l_quantity) + aggregations: sum(l_quantity), count(l_quantity) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint) Map 5 Map Operator Tree: TableScan @@ -2887,35 +2887,39 @@ STAGE PLANS: Reducer 10 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 12 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), true (type: boolean) + expressions: (_col0 / _col1) (type: double), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Reducer 2 Reduce Operator Tree: @@ -2975,12 +2979,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col2, _col4, _col5 - Statistics: Num rows: 25 Data size: 5224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 3424 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: double) sort order: + Map-reduce partition columns: _col2 (type: double) - Statistics: Num rows: 25 Data size: 5224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 3424 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col4 (type: bigint), _col5 (type: bigint) Reducer 8 Reduce Operator Tree: @@ -2991,24 +2995,24 @@ STAGE PLANS: 0 _col2 (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col2, _col4, _col5, _col7 - Statistics: Num rows: 27 Data size: 5746 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 27 Data size: 3766 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col4 = 0L) or (_col7 is null and _col2 is not null and (_col5 >= _col4))) (type: boolean) - Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2371 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), 1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2371 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2371 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 17 Data size: 3617 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 2371 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out index 82a1304a93..8d141064be 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_notin.q.out @@ -627,12 +627,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 5357 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(_col1) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col1) (type: double) - Statistics: Num rows: 26 Data size: 5357 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Reduce Operator Tree: @@ -643,17 +643,17 @@ STAGE PLANS: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 28 Data size: 5892 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 28 Data size: 3947 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col2 = 0L) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 18 Data size: 3787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 18 Data size: 3787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 18 Data size: 3787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2537 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -693,30 +693,34 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(_col0) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(), count(_col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 8 Reduce Operator Tree: Select Operator @@ -752,30 +756,30 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 9 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), true (type: boolean) + expressions: (_col0 / _col1) (type: double), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/spark/subquery_select.q.out b/ql/src/test/results/clientpositive/spark/subquery_select.q.out index 6d839facd6..7d23b780fc 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_select.q.out @@ -3688,14 +3688,14 @@ STAGE PLANS: predicate: p_partkey BETWEEN 1 AND 20 (type: boolean) Statistics: Num rows: 2 Data size: 242 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_partkey) + aggregations: sum(p_partkey), count(p_partkey) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Map 9 Map Operator Tree: TableScan @@ -3751,10 +3751,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2 - Statistics: Num rows: 26 Data size: 2340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 780 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 2340 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 780 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: double) Reducer 4 Reduce Operator Tree: @@ -3765,14 +3765,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 910 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: CASE WHEN ((_col1 > 409437L)) THEN (_col2) ELSE (_col3) END (type: double) outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 910 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 2470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 910 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3791,14 +3791,18 @@ STAGE PLANS: Reducer 8 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -4036,14 +4040,14 @@ STAGE PLANS: outputColumnNames: p_size Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Map 7 Map Operator Tree: TableScan @@ -4094,30 +4098,34 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 5149 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 3589 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (UDFToDouble(_col1) > _col2) (type: boolean) - Statistics: Num rows: 8 Data size: 1584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 8 Data size: 1584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 8 Data size: 1584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reducer 8 Reduce Operator Tree: Group By Operator @@ -4220,14 +4228,14 @@ STAGE PLANS: outputColumnNames: p_size Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(p_size) + aggregations: sum(p_size), count(p_size) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Map 12 Map Operator Tree: TableScan @@ -4285,14 +4293,18 @@ STAGE PLANS: Reducer 11 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) Reducer 13 Reduce Operator Tree: Group By Operator @@ -4341,10 +4353,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 2366 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 806 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 26 Data size: 2366 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 806 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 5 Reduce Operator Tree: @@ -4355,14 +4367,14 @@ STAGE PLANS: 0 1 outputColumnNames: _col1, _col2, _col3, _col4 - Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 1040 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 1040 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 2600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 1040 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index 0a67096ee2..1125eb3146 100644 --- a/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -276,28 +276,32 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Group By Operator @@ -306,14 +310,14 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out index d81781e881..83d5a6211f 100644 --- a/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_cast_constant.q.out @@ -141,18 +141,18 @@ STAGE PLANS: projectedOutputColumnNums: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(50), avg(50.0D), avg(50) + aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 12:int) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 14:decimal(10,0)) -> struct + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 2:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -164,7 +164,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -185,29 +185,38 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 1:struct) -> double, VectorUDAFAvgFinal(col 2:struct) -> double, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,4) + aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(12,0)) -> decimal(12,0), VectorUDAFCountMerge(col 6:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:int native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double), (_col3 / _col4) (type: double), CAST( (_col5 / _col6) AS decimal(6,4)) (type: decimal(6,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + projectedOutputColumnNums: [0, 7, 8, 11] + selectExpressions: LongColDivideLongColumn(col 1:bigint, col 2:bigint) -> 7:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 8:double, CastDecimalToDecimal(col 10:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 5:decimal(12,0), col 9:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 9:decimal(19,0)) -> 10:decimal(32,20)) -> 11:decimal(6,4) Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.1 + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(6,4)) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -218,7 +227,7 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(6,4)) outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator diff --git a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 98c709c872..d37a27e07d 100644 --- a/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -249,26 +249,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10] + selectExpressions: CastDecimalToDouble(col 1:decimal(20,10)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(20,10)) -> 6:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(23,14)) -> 7:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 9:double) -> 10:double Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFAvgDecimal(col 1:decimal(20,10)) -> struct, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFAvgDecimal(col 2:decimal(23,14)) -> struct, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -279,9 +280,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -297,29 +298,58 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(20,10), cdecimal2:decimal(23,14), cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(23,14), VALUE._col8:decimal(23,14), VALUE._col9:decimal(33,14), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 10:decimal(33,14)) -> decimal(33,14), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), (CAST( _col4 AS decimal(24,14)) / _col1) (type: decimal(38,28)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), (CAST( _col10 AS decimal(27,18)) / _col7) (type: decimal(38,29)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 24, 19, 25] + selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(24,14), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(30,10)) -> 14:decimal(24,14), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(38,28), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, DecimalColDivideDecimalColumn(col 23:decimal(27,18), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(33,14)) -> 23:decimal(27,18), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,29), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -350,14 +380,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby #### A masked pattern was here #### --3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054 --563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.63641486490000 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.503273076922950000 1707.9424961538462 2415.395441814127 -253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274 -528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984 -626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603 -6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653 -762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118758 -NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924 +-3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.6735247296333333333333333333 2174330.209240386 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.81120730764118333333333333333 2604201.2704476737 2852759.5602156054 +-563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649000000000000000000 1426.0153418918997 2016.6902366556305 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.50327307692295000000000000000 1707.9424961538462 2415.395441814127 +253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.3399236697630859375000000000 5708.956347886203 5711.745967572781 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.42835967548079188476562500000 6837.632716002931 6840.973851172272 +528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.4109968243230468750000000000 257528.9298820665 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.96562480769168948242187500000 308443.1074570797 308593.82484083937 +626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.2939966110631835937500000000 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.32914804687497798828125000000 6877.318722794881 6880.679250101608 +6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.6757008106666666666666666667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.69651461528206666666666666667 3292794.4113115156 4032833.0678006653 +762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917000000000000000000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.07839499984625000000000000000 3491310.1327026924 4937458.140118757 +NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.6081081080666666666666666667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.57692307692296666666666666667 6821.495748565151 6822.606289190915 PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2, @@ -628,26 +658,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 6, 9, 7, 12] + selectExpressions: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 8:double)(children: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 7:double, CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 8:double) -> 9:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 7:double, DoubleColMultiplyDoubleColumn(col 8:double, col 11:double)(children: CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 8:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 11:double) -> 12:double Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFAvgDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> struct, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 6:decimal(11,5)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFAvgDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> struct, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 7:decimal(16,0)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 8:decimal(16,0)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -658,9 +689,9 @@ STAGE PLANS: keyColumnNums: [0] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -676,29 +707,58 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(11,5)/DECIMAL_64, cdecimal2:decimal(16,0)/DECIMAL_64, cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(11,5), decimal(11,5), decimal(16,0), decimal(16,0)] + scratchColumnTypeNames: [decimal(11,5), double, double, double, double, decimal(16,0), double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0), VALUE._col8:decimal(16,0), VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), (CAST( _col4 AS decimal(15,9)) / _col1) (type: decimal(35,29)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), CAST( (CAST( _col10 AS decimal(20,4)) / _col7) AS decimal(20,4)) (type: decimal(20,4)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 23, 19, 25] + selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(15,9), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(21,5)) -> 14:decimal(15,9), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(35,29), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, CastDecimalToDecimal(col 24:decimal(38,22))(children: DecimalColDivideDecimalColumn(col 23:decimal(20,4), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(26,0)) -> 23:decimal(20,4), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,22)) -> 23:decimal(20,4), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -729,14 +789,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### --3728 5 -515.62107 -3367.65176 -13986.22811 -2797.245622000 1140.812276 1275.466899351126 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 --563 2 -515.62107 -3367.65176 -3883.27283 -1941.636415000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 -253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.339923750 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 -528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.351935137 5555.7621107931345 5558.482190324908 1024 6984454 -11710 13948890 13621.9629 308443.09823296947 308593.8156122219 -626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325366 5744.897264122336 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 -6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 -762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 -NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +-3728 5 -515.62107 -3367.65176 -13986.22811 -2797.24562200000000000000000000000 1140.8122759999992 1275.466899351125 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 +-563 2 -515.62107 -3367.65176 -3883.27283 -1941.63641500000000000000000000000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 +253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.33992375000000000000000000000 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 +528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.35193513698630136986301369863 5555.762110793133 5558.482190324906 1024 6984454 -11710 13948890 13621.9629 308443.0982329696 308593.815612222 +626923679 1024 9723.40270 -9778.95135 10541.05247 10.29399655273437500000000000000 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 +6981 2 -515.62107 -515.62107 -1031.24214 -515.62107000000000000000000000000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 +762 1 1531.21941 1531.21941 1531.21941 1531.21941000000000000000000000000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 +NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.60811000000000000000000000000 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -755,4 +815,4 @@ FROM (SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### -91757235680 +96966670826 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out index ec73876402..c3201bf124 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -403,18 +403,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint) - outputColumnNames: ctinyint + expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -426,22 +426,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -452,12 +458,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,7 +503,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +-4.344925324321378 1158.3003004768175 1158.3003004768175 1158.426587033782 34.03381113652741 34.03381113652741 34.03381113652741 34.03566639620535 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cbigint) as c1, MAX(cbigint), @@ -903,18 +909,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint) - outputColumnNames: cbigint + expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -926,22 +932,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -952,12 +964,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 640 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -997,7 +1009,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +-1.8515862077935246E8 2.07689300543070106E18 2.07689300543070106E18 2.07711944383076992E18 1.441142951074147E9 1.441142951074147E9 1.441142951074147E9 1.441221511021387E9 PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT MIN(cfloat) as c1, MAX(cfloat), @@ -1403,18 +1415,18 @@ STAGE PLANS: alias: alltypesorc Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cfloat (type: float) - outputColumnNames: cfloat + expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -1426,22 +1438,28 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: double) - sort order: + - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col3, _col4, _col7 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col3 (type: double), _col4 (type: double), _col7 (type: double) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -1452,12 +1470,12 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 636 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1497,7 +1515,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +-4.303895780321011 1163.8972588605056 1163.8972588605056 1164.0241556397098 34.11593848717203 34.11593848717203 34.11593848717203 34.11779822379677 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT AVG(cbigint), @@ -1584,25 +1602,26 @@ STAGE PLANS: predicate: (((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (UDFToInteger(ctinyint) = 3569)) or (CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cbigint, cfloat + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 3, 4] + projectedOutputColumnNums: [3, 4, 0, 14, 17] + selectExpressions: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 16:double) -> 17:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 3:bigint) -> struct, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_samp, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1610,9 +1629,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -1628,26 +1647,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(13,3), double] + scratchColumnTypeNames: [decimal(13,3), double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), min(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinLong(col 6:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (-6432.0D + _col0) (type: double), _col1 (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) + (-6432.0D + _col0)) (type: double), _col2 (type: double), (- (-6432.0D + _col0)) (type: double), (-6432.0D + (- (-6432.0D + _col0))) (type: double), (- (-6432.0D + _col0)) (type: double), ((- (-6432.0D + _col0)) / (- (-6432.0D + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432.0D + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + expressions: (_col0 / _col1) (type: double), (- (_col0 / _col1)) (type: double), (-6432.0D + (_col0 / _col1)) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) + (-6432.0D + (_col0 / _col1))) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), (-6432.0D + (- (-6432.0D + (_col0 / _col1)))) (type: double), (- (-6432.0D + (_col0 / _col1))) (type: double), ((- (-6432.0D + (_col0 / _col1))) / (- (-6432.0D + (_col0 / _col1)))) (type: double), _col4 (type: bigint), _col5 (type: double), (((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) % power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5)) (type: double), (- ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), ((- (-6432.0D + (_col0 / _col1))) * (- (_col0 / _col1))) (type: double), _col6 (type: tinyint), (- _col6) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [7, 9, 10, 8, 11, 13, 14, 12, 19, 18, 22, 4, 5, 25, 20, 28, 6, 27] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 9:double, DoubleScalarAddDoubleColumn(val -6432.0, col 8:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 8:double) -> 10:double, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 11:double)(children: DoubleColDivideLongColumn(col 8:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 8:double) -> 11:double) -> 8:double) -> 11:double) -> 8:double, DoubleColUnaryMinus(col 12:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 11:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double) -> 11:double, DoubleColAddDoubleColumn(col 12:double, col 14:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 13:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 13:double) -> 14:double) -> 13:double, DoubleColDivideLongColumn(col 12:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 12:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 12:double) -> 14:double) -> 12:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double, DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 12:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 18:double) -> 12:double, DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 18:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColDivideDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 20:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 20:double) -> 21:double) -> 20:double, DoubleColUnaryMinus(col 22:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 22:double) -> 21:double) -> 22:double, DoubleColModuloDoubleColumn(col 21:double, col 20:double)(children: DoubleColDivideLongColumn(col 20:double, col 24:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 17:boolean, null, col 23:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 23:bigint) -> 24:bigint) -> 21:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double, IfExprNullCondExpr(col 24:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 24:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 21:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 21:double, col 29:double)(children: DoubleColUnaryMinus(col 28:double)(children: DoubleScalarAddDoubleColumn(val -6432.0, col 21:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 21:double) -> 28:double) -> 21:double, DoubleColUnaryMinus(col 28:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 28:double) -> 29:double) -> 28:double, LongColUnaryMinus(col 6:tinyint) -> 27:tinyint + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1716,7 +1759,7 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593627E9 3.875716535945533E8 0.0 2.0634715172019392E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0516820315185745E9 -2.0634715172019392E18 1.5020929380914048E17 -64 64 PREHOOK: query: EXPLAIN extended select count(*) from alltypesorc where (((cstring1 LIKE 'a%') or ((cstring1 like 'b%') or (cstring1 like 'c%'))) or diff --git a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out index a5d4a14de9..71625e0a93 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -71,25 +71,26 @@ STAGE PLANS: predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, cfloat, cdouble + expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5] + projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -97,9 +98,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -115,26 +116,50 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:tinyint, VALUE._col5:int, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFMaxLong(col 4:tinyint) -> tinyint, VectorUDAFMaxLong(col 5:int) -> int, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 10, 3, 12, 14, 4, 13, 5, 17, 18, 22, 9, 21] + selectExpressions: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 11:double) -> 10:double) -> 11:double, DoubleColDivideDoubleScalar(col 12:double, val -26.28)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 12:double)(children: DoubleColDivideLongColumn(col 10:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 10:double) -> 12:double) -> 10:double) -> 12:double) -> 10:double, DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 12:double, DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 13:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 13:double) -> 15:double) -> 13:double, DecimalColMultiplyDecimalScalar(col 16:decimal(10,0), val 79.553)(children: CastLongToDecimal(col 5:int) -> 16:decimal(10,0)) -> 17:decimal(16,3), DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 18:double)(children: DoubleColDivideLongColumn(col 15:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 15:double) -> 18:double) -> 15:double, IfExprNullCondExpr(col 19:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 18:double, DoubleScalarModuloDoubleColumn(val 10.175, col 15:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 15:double)(children: DoubleScalarAddDoubleColumn(val -1.389, col 3:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double, LongScalarModuloLongColumn(val -563, col 5:int) -> 21:int + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +215,4 @@ WHERE (((cdouble > ctinyint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563 +1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 55e3ad6981..24cfa4e849 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -94,26 +94,27 @@ STAGE PLANS: predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) - outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 5, 6, 10] + projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: - aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop + aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4] - keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) @@ -124,9 +125,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumnNums: [4, 5, 6, 7, 8, 9, 10] Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -142,27 +143,55 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaa + reduceColumnSortOrder: ++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 11 + dataColumns: KEY._col0:double, KEY._col1:bigint, KEY._col2:string, KEY._col3:boolean, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:bigint, VALUE._col6:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumLong(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double, col 1:bigint, col 2:string, col 3:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double) + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] + selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleScalar(col 15:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 15:double) -> 20:double) -> 15:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 20:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 22:double, DecimalScalarAddDecimalColumn(val -5638.15, col 23:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 23:decimal(19,0)) -> 24:decimal(22,2), DoubleColDivideDoubleColumn(col 21:double, col 25:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 21:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 25:double) -> 26:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 25:double) -> 21:double) -> 25:double, DoubleColAddDoubleColumn(col 27:double, col 28:double)(children: DoubleColDivideDoubleScalar(col 21:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 27:double, DoubleColUnaryMinus(col 21:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 21:double) -> 28:double) -> 21:double, FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [3, 11, 12, 4, 13, 14, 19, 15, 20, 22, 24, 9, 26, 25, 21, 27] Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out index de501e7d10..c2a80062fe 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -96,26 +96,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -126,9 +127,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13, 14] Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -144,27 +145,55 @@ STAGE PLANS: includeColumns: [0, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4)] + scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 15 + dataColumns: KEY._col0:boolean, KEY._col1:tinyint, KEY._col2:timestamp, KEY._col3:float, KEY._col4:string, VALUE._col0:tinyint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double, VALUE._col4:bigint, VALUE._col5:double, VALUE._col6:double, VALUE._col7:bigint, VALUE._col8:float, VALUE._col9:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [] Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -419,26 +448,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -449,7 +479,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -461,25 +491,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 5:tinyint) -> tinyint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFMaxDouble(col 13:float) -> float, VectorUDAFMinLong(col 14:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean, col 1:tinyint, col 2:timestamp, col 3:float, col 4:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 20, 19, 21, 22, 23, 24, 27, 28, 25, 13, 31, 14] + selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 19:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 19:double) -> 20:double, DoubleColUnaryMinus(col 6:double) -> 19:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 21:float, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 6:double) -> 23:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, DecimalColSubtractDecimalScalar(col 26:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 26:decimal(3,0)) -> 27:decimal(7,3), DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 28:double, DoubleScalarDivideDoubleColumn(val -26.28, col 29:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 6:double) -> 25:double) -> 29:double) -> 25:double, DoubleColDivideDoubleColumn(col 30:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 29:double)(children: CastLongToDouble(col 18:tinyint)(children: LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 18:tinyint) -> 29:double) -> 30:double, CastLongToDouble(col 1:tinyint) -> 29:double) -> 31:double Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out index b583ceeead..95bf29bb30 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -96,27 +96,27 @@ STAGE PLANS: predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 4, 6, 10, 5, 14] - selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double + projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] + selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 14:double) -> struct aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) @@ -127,9 +127,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumnNums: [5, 6, 7, 8, 9, 10, 11] Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) + value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -145,27 +145,55 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaaaa + reduceColumnSortOrder: +++++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 12 + dataColumns: KEY._col0:string, KEY._col1:float, KEY._col2:double, KEY._col3:timestamp, KEY._col4:boolean, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:float, VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDouble(col 8:float) -> float, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:float, col 2:double, col 3:timestamp, col 4:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] + selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 13:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 13:double) -> 15:double) -> 13:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 15:float, DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleColUnaryMinus(col 8:float) -> 20:float, DoubleColDivideDoubleScalar(col 22:double, val 10.175)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 21:double) -> 22:double) -> 21:double, FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 22:double) -> 23:double) -> 22:double) -> 23:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColDivideDoubleScalar(col 24:double, val 10.175)(children: DoubleColUnaryMinus(col 23:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 23:double) -> 24:double) -> 23:double) -> 24:double, DoubleScalarModuloDoubleColumn(val -1.389, col 23:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 25:double)(children: DoubleColDivideLongColumn(col 23:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 23:double) -> 25:double) -> 23:double, IfExprNullCondExpr(col 18:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 23:double) -> 25:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 23:double, DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColModuloDoubleScalar(col 30:double, val 10.175)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 27:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double, DoubleColUnaryMinus(col 30:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 30:double) -> 34:double Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumnNums: [0, 1, 2, 3] + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumnNums: [4, 12, 14, 13, 15, 8, 19, 20, 21, 22, 11, 24, 25, 23, 29, 28, 31, 34] Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 70aacfcb9e..d0b03b3778 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -92,26 +92,27 @@ STAGE PLANS: predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10] + projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) @@ -122,9 +123,9 @@ STAGE PLANS: keyColumnNums: [0, 1, 2, 3, 4, 5, 6] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15, 16] Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) + value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -140,7 +141,7 @@ STAGE PLANS: includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double, double] Reducer 2 Reduce Vectorization: enabled: false @@ -148,13 +149,13 @@ STAGE PLANS: enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 61d1345c89..8798ebeda1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -69,39 +69,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumnNums: [3, 4, 5, 6] Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -117,26 +118,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out index f1ee93662f..99afc2bc29 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -75,25 +75,26 @@ STAGE PLANS: predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3, 4, 5] + projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -101,9 +102,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -119,26 +120,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 10 + dataColumns: VALUE._col0:bigint, VALUE._col1:bigint, VALUE._col2:double, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:bigint, VALUE._col7:tinyint, VALUE._col8:double, VALUE._col9:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFMinLong(col 7:tinyint) -> tinyint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 % -563.0D) (type: double), (_col0 + 762.0D) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0D) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) % -563.0D) (type: double), ((_col0 / _col1) + 762.0D) (type: double), _col2 (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), (- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) (type: double), (_col2 - (_col0 / _col1)) (type: double), _col6 (type: bigint), (- (_col2 - (_col0 / _col1))) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / _col5) - 762.0D) (type: double), _col7 (type: tinyint), ((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) (type: double), (_col8 / _col9) (type: double), (((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) - _col2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 12, 13, 2, 14, 11, 16, 6, 15, 17, 7, 20, 18, 19] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 10:double, DoubleColModuloDoubleScalar(col 11:double, val -563.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double, DoubleColAddDoubleScalar(col 11:double, val 762.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 14:double) -> 11:double) -> 14:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 15:double)(children: DoubleColDivideLongColumn(col 11:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 11:double) -> 15:double) -> 11:double) -> 15:double) -> 11:double, DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColSubtractDoubleColumn(col 2:double, col 15:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 15:double) -> 17:double) -> 15:double, DoubleColSubtractDoubleScalar(col 18:double, val 762.0)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, DoubleColAddDoubleColumn(col 18:double, col 19:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 18:double) -> 19:double) -> 18:double) -> 19:double) -> 18:double, CastLongToDouble(col 7:tinyint) -> 19:double) -> 20:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 18:double, DoubleColSubtractDoubleColumn(col 22:double, col 2:double)(children: DoubleColAddDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 21:double)(children: DoubleColDivideLongColumn(col 19:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 19:double) -> 21:double) -> 19:double) -> 21:double) -> 19:double, CastLongToDouble(col 7:tinyint) -> 21:double) -> 22:double) -> 19:double + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,4 +223,4 @@ WHERE (((ctimestamp1 < ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378778E18 -1.49936299222378778E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378701E18 -64 -1.49936299222378778E18 -5650.1297631138395 -1.49936299222378496E18 +-5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378906E18 -1.49936299222378906E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378829E18 -64 -1.49936299222378906E18 -5650.1297631138395 -1.49936299222378624E18 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out index c78de728c5..2bccf64121 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -80,25 +80,26 @@ STAGE PLANS: predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, csmallint, cint, cfloat + expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4] + projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 19:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -106,9 +107,9 @@ STAGE PLANS: keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -124,26 +125,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 14 + dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint, VALUE._col3:double, VALUE._col4:double, VALUE._col5:bigint, VALUE._col6:double, VALUE._col7:double, VALUE._col8:bigint, VALUE._col9:double, VALUE._col10:bigint, VALUE._col11:bigint, VALUE._col12:double, VALUE._col13:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumLong(col 10:bigint) -> bigint, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 13:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 - 10.175D) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175D)) (type: double), (- _col1) (type: double), (_col0 % 79.553D) (type: double), (- (_col0 * (_col0 - 10.175D))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175D))) / (_col0 - 10.175D)) (type: double), (- (_col0 - 10.175D)) (type: double), _col4 (type: double), (-3728.0D - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + expressions: power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5)) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) % 79.553D) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5) (type: double), (- power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), _col9 (type: double), ((- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) / (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (_col10 / _col11) (type: double), (-3728.0D - power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), power(((_col12 - ((_col13 * _col13) / _col11)) / _col11), 0.5) (type: double), ((_col10 / _col11) / power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [14, 19, 15, 23, 26, 29, 22, 32, 40, 9, 43, 35, 46, 54, 53, 59] + selectExpressions: FuncPowerDoubleToDouble(col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 15:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 17:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 17:bigint) -> 18:bigint) -> 15:double) -> 14:double, DoubleColSubtractDoubleScalar(col 15:double, val 10.175)(children: FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 21:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 19:double)(children: DoubleColDivideLongColumn(col 15:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 15:double) -> 19:double) -> 15:double, IfExprNullCondExpr(col 18:boolean, null, col 20:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 20:bigint) -> 21:bigint) -> 19:double) -> 15:double) -> 19:double, FuncPowerDoubleToDouble(col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 22:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 15:double) -> 22:double) -> 15:double) -> 22:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 22:double, col 26:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 23:double) -> 22:double, IfExprNullCondExpr(col 21:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 22:double, DoubleColSubtractDoubleScalar(col 23:double, val 10.175)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 26:double)(children: DoubleColDivideLongColumn(col 23:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 23:double) -> 26:double) -> 23:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 23:double) -> 26:double) -> 23:double, DoubleColUnaryMinus(col 22:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 22:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double) -> 22:double) -> 26:double, DoubleColModuloDoubleScalar(col 22:double, val 79.553)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 31:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 29:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 29:double) -> 22:double, IfExprNullCondExpr(col 28:boolean, null, col 30:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 30:bigint) -> 31:bigint) -> 29:double) -> 22:double) -> 29:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColMultiplyDoubleColumn(col 22:double, col 35:double)(children: FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 32:double)(children: DoubleColDivideLongColumn(col 22:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 22:double) -> 32:double) -> 22:double, IfExprNullCondExpr(col 31:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 31:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 32:double) -> 22:double, DoubleColSubtractDoubleScalar(col 32:double, val 10.175)(children: FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 34:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double) -> 32:double) -> 35:double) -> 32:double) -> 22:double, FuncPowerDoubleToDouble(col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 39:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 35:double)(children: DoubleColDivideLongColumn(col 32:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 32:double) -> 35:double) -> 32:double, IfExprNullCondExpr(col 37:boolean, null, col 38:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 38:bigint) -> 39:bigint) -> 35:double) -> 32:double, DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 42:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 40:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 40:double) -> 35:double, IfExprNullCondExpr(col 39:boolean, null, col 41:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 39:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 41:bigint) -> 42:bigint) -> 40:double) -> 35:double) -> 40:double, DoubleColDivideDoubleColumn(col 35:double, col 46:double)(children: DoubleColUnaryMinus(col 43:double)(children: DoubleColMultiplyDoubleColumn(col 35:double, col 46:double)(children: FuncPowerDoubleToDouble(col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 43:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 43:double) -> 35:double, IfExprNullCondExpr(col 42:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 42:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 45:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 45:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double) -> 35:double, DoubleColSubtractDoubleScalar(col 43:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 50:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 43:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 43:double) -> 46:double) -> 43:double, IfExprNullCondExpr(col 48:boolean, null, col 49:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 48:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 49:bigint) -> 50:bigint) -> 46:double) -> 43:double) -> 46:double) -> 43:double, DoubleColUnaryMinus(col 46:double)(children: DoubleColSubtractDoubleScalar(col 35:double, val 10.175)(children: FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 52:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 46:double)(children: DoubleColDivideLongColumn(col 35:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 35:double) -> 46:double) -> 35:double, IfExprNullCondExpr(col 50:boolean, null, col 51:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 50:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 51:bigint) -> 52:bigint) -> 46:double) -> 35:double) -> 46:double) -> 35:double, LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 46:double, DoubleScalarSubtractDoubleColumn(val -3728.0, col 53:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 56:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 54:double)(children: DoubleColDivideLongColumn(col 53:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 53:double) -> 54:double) -> 53:double, IfExprNullCondExpr(col 52:boolean, null, col 55:bigint)(children: LongColEqualLongScalar(col 2:bigint, val 1) -> 52:boolean, LongColSubtractLongScalar(col 2:bigint, val 1) -> 55:bigint) -> 56:bigint) -> 54:double) -> 53:double) -> 54:double, FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 57:double)(children: DoubleColDivideLongColumn(col 53:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 53:double) -> 57:double) -> 53:double) -> 57:double) -> 53:double, DoubleColDivideDoubleColumn(col 57:double, col 58:double)(children: LongColDivideLongColumn(col 10:bigint, col 11:bigint) -> 57:double, FuncPowerDoubleToDouble(col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 61:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 58:double) -> 59:double) -> 58:double, IfExprNullCondExpr(col 56:boolean, null, col 60:bigint)(children: LongColEqualLongScalar(col 8:bigint, val 1) -> 56:boolean, LongColSubtractLongScalar(col 8:bigint, val 1) -> 60:bigint) -> 61:bigint) -> 59:double) -> 58:double) -> 59:double + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -208,4 +233,4 @@ WHERE (((cint <= cfloat) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.34690095515641 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL +0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.3469009551564 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out index c9246510aa..922eb90555 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -75,17 +75,18 @@ STAGE PLANS: predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) - outputColumnNames: ctinyint, cint, cdouble + expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 5] + projectedOutputColumnNums: [2, 5, 0, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -93,7 +94,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -102,8 +103,8 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumnNums: [0, 1, 2, 3, 4] - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -119,26 +120,50 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 5] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 5 + dataColumns: VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint, VALUE._col4:tinyint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMinLong(col 4:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2)) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2))) (type: double) + expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (- power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5)) (type: double), (_col2 / _col3) (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3))) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 5, 6, 7, 9, 8, 11, 15, 14, 13, 18, 4, 4, 19] + selectExpressions: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 5:bigint, LongScalarAddLongColumn(val -3728, col 0:bigint) -> 6:bigint, FuncPowerDoubleToDouble(col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 8:double)(children: DoubleColDivideLongColumn(col 7:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 7:double) -> 8:double) -> 7:double) -> 8:double) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: FuncPowerDoubleToDouble(col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double) -> 8:double) -> 9:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 8:double, LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 11:bigint, DoubleColDivideDoubleColumn(col 13:double, col 14:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 14:double) -> 15:double, DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 13:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 13:double) -> 14:double) -> 13:double) -> 14:double, DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideDoubleColumn(col 13:double, col 16:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 13:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 16:double) -> 17:double) -> 13:double, LongColSubtractLongColumn(col 10:bigint, col 12:bigint)(children: LongScalarAddLongColumn(val -3728, col 0:bigint) -> 10:bigint, LongColMultiplyLongScalar(col 0:bigint, val -563) -> 12:bigint) -> 18:bigint, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 4:tinyint) -> 16:double, DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 12:bigint)(children: LongColModuloLongColumn(col 10:bigint, col 0:bigint)(children: LongColMultiplyLongScalar(col 0:bigint, val -563) -> 10:bigint) -> 12:bigint) -> 17:double, DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 19:double) -> 20:double) -> 17:double) -> 19:double + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -198,4 +223,4 @@ WHERE (((csmallint >= cint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --493101012745 277615870175435 -493101016473 136727.7868296355 -136727.7868296355 2298.5515807767374 0 0.0 1.8694487691330246E10 -0.0 -278108971191908 -64 -64 0.0 +-493101012745 277615870175435 -493101016473 136727.78682963562 -136727.78682963562 2298.5515807767374 0 0.0 1.8694487691330276E10 -0.0 -278108971191908 -64 -64 0.0 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 61d1345c89..8798ebeda1 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -69,39 +69,40 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator keyColumnNums: [0, 1, 2] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumnNums: [3, 4, 5, 6] Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -117,26 +118,51 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY._col0:string, KEY._col1:double, KEY._col2:timestamp, VALUE._col0:bigint, VALUE._col1:double, VALUE._col2:double, VALUE._col3:double + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 6:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:string, col 1:double, col 2:timestamp + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3] + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 8, 14, 20, 6, 10, 22, 17] + selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 10:double)(children: DoubleColDivideLongColumn(col 8:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 8:double) -> 10:double) -> 8:double, IfExprNullCondExpr(col 11:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 11:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 10:double) -> 8:double, DoubleColUnaryMinus(col 10:double)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 14:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 14:double) -> 10:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 10:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 10:double, col 17:double)(children: FuncPowerDoubleToDouble(col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 17:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 17:double) -> 10:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 10:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 20:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 10:double, DecimalColDivideDecimalScalar(col 21:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 21:decimal(19,0)) -> 22:decimal(28,6), FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 17:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double) -> 23:double) -> 17:double, IfExprNullCondExpr(col 19:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 17:double Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out index 01eb4b46a9..830d0f8fb7 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out @@ -171,21 +171,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -197,25 +197,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -245,36 +251,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -287,37 +293,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -328,28 +334,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -360,18 +366,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: explain vectorization select * from alltypes_parquet @@ -492,45 +498,51 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Map Vectorization: enabled: false enabledConditionsNotMet: Row deserialization of vectorized input format not supported IS false, hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat IS false inputFileFormats: org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -560,36 +572,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -602,37 +614,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -643,28 +655,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -675,18 +687,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: explain vectorization select * from alltypes_parquet @@ -813,21 +825,21 @@ STAGE PLANS: alias: alltypes_parquet Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -839,25 +851,31 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -887,36 +905,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_parquet #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -929,37 +947,37 @@ POSTHOOK: Input: default@alltypes_parquet -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -970,28 +988,28 @@ POSTHOOK: Input: default@alltypes_parquet 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -1002,18 +1020,18 @@ POSTHOOK: Input: default@alltypes_parquet 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: create table if not exists alltypes_orc ( cint int, @@ -1182,45 +1200,51 @@ STAGE PLANS: alias: alltypes_orc Statistics: Num rows: 12288 Data size: 1110042 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 1110042 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) - keys: ctinyint (type: tinyint) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5) + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 12288 Data size: 1110042 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 1110042 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint) Map Vectorization: enabled: false enabledConditionsNotMet: hive.vectorized.use.vectorized.input.format IS true AND hive.vectorized.input.format.excludes NOT CONTAINS org.apache.hadoop.hive.ql.io.orc.OrcInputFormat IS false inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 6144 Data size: 555021 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 6144 Data size: 555021 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6144 Data size: 555021 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1250,36 +1274,36 @@ POSTHOOK: query: select ctinyint, POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypes_orc #### A masked pattern was here #### --1 626923679 -15441 36 -1.0486250072717667 8786.246963933321 +-1 626923679 -15441 36 -1.0486250072717667 8786.246963933323 -10 626923679 -15384 28 -10.0 8850.451610567823 --11 626923679 -15659 32 -11.0 10453.738567408038 --12 626923679 -16373 22 -12.0 10173.15707541171 --13 626923679 -15446 30 -13.0 8907.942987576693 +-11 626923679 -15659 32 -11.0 10453.73856740804 +-12 626923679 -16373 22 -12.0 10173.157075411711 +-13 626923679 -15446 30 -13.0 8907.942987576691 -14 626923679 -13884 22 -14.0 10125.818731386042 --15 626923679 -16036 24 -15.0 9450.506254395024 +-15 626923679 -16036 24 -15.0 9450.506254395026 -16 626923679 -15154 21 -16.0 8884.207393686478 -17 626923679 -15922 19 -17.0 9944.104273894172 -18 626923679 -14863 24 -18.0 9638.430684071413 -19 626923679 -15935 25 -19.0 9967.22240685782 -2 626923679 -16277 20 -2.0 10800.090249507177 --20 626923679 -16126 24 -20.0 9868.92268080106 +-20 626923679 -16126 24 -20.0 9868.922680801063 -21 626923679 -16017 27 -21.0 9480.349236669877 -22 626923679 -14701 22 -22.0 8809.230165774987 -23 626923679 -16355 36 -23.345263230173213 9401.831290253447 -24 626923679 -16311 26 -24.0 9386.736402961187 --25 626923679 -15862 24 -25.0 9778.256724727018 --26 626923679 -15686 15 -26.0 10874.523900405318 +-25 626923679 -15862 24 -25.0 9778.25672472702 +-26 626923679 -15686 15 -26.0 10874.52390040532 -27 626923679 -14984 20 -27.0 8465.29660255097 -28 626923679 -15813 20 -28.0 9616.869413270924 --29 626923679 -14747 26 -29.0 9052.945656011721 +-29 626923679 -14747 26 -29.0 9052.945656011723 -3 626923679 -13632 16 -3.0 8836.215573422822 --30 626923679 -14863 23 -30.0 9193.941914019653 +-30 626923679 -14863 23 -30.0 9193.941914019651 -31 626923679 -15915 22 -31.0 9187.596784112568 --32 626923679 -15866 25 -32.0 9535.546396775915 +-32 626923679 -15866 25 -32.0 9535.546396775917 -33 626923679 -12779 21 -33.0 8854.331159704514 -34 626923679 -15450 29 -34.0 8708.243526705026 -35 626923679 -16059 23 -35.0 10136.580492864763 --36 626923679 -16208 23 -36.0 8773.547684436919 +-36 626923679 -16208 23 -36.0 8773.54768443692 -37 626923679 -14780 17 -37.0 10368.905538788269 -38 626923679 -14914 28 -38.0 8767.375358291503 -39 626923679 -15612 19 -39.0 9765.551806305297 @@ -1292,37 +1316,37 @@ POSTHOOK: Input: default@alltypes_orc -45 626923679 -15027 21 -45.0 8567.489593562543 -46 626923679 -12427 21 -46.0 9182.943188188632 -47 626923679 -16096 19 -47.0 9011.009178780589 --48 626923679 -15462 26 -48.0 9913.883371354861 +-48 626923679 -15462 26 -48.0 9913.883371354863 -49 626923679 -14831 23 -49.0 9894.429191738676 -5 626923679 -15780 24 -5.0 10599.227726422314 -50 626923679 -14320 27 -50.0 8548.827748002343 -51 1073680599 -15734 1028 -51.0 9531.569305177045 -52 626923679 -16369 30 -52.0 8625.06871423408 --53 626923679 -15445 19 -53.0 9387.739325499799 --54 626923679 -14815 23 -54.0 9614.154026896626 +-53 626923679 -15445 19 -53.0 9387.7393254998 +-54 626923679 -14815 23 -54.0 9614.154026896624 -55 626923679 -13381 26 -55.0 9157.562103946742 --56 626923679 -11999 33 -56.0 9490.842152672341 --57 626923679 -14893 32 -57.0 8572.083461570477 --58 626923679 -15169 20 -58.0 9549.096672008198 --59 626923679 -15789 28 -59.0 9829.790704244733 +-56 626923679 -11999 33 -56.0 9490.84215267234 +-57 626923679 -14893 32 -57.0 8572.083461570479 +-58 626923679 -15169 20 -58.0 9549.096672008196 +-59 626923679 -15789 28 -59.0 9829.790704244735 -6 626923679 -15980 30 -6.0 10262.829252317424 -60 626923679 -15792 24 -60.0 9892.656196775464 --61 626923679 -15142 22 -61.0 9357.236187870849 --62 626923679 -15992 24 -62.0 9004.593091474135 +-61 626923679 -15142 22 -61.0 9357.23618787085 +-62 626923679 -15992 24 -62.0 9004.593091474137 -63 626923679 -12516 16 -63.0 9263.605837223322 -64 626923679 -15920 21 -64.0 9254.456539277186 --7 626923679 -14584 23 -7.0 9946.605446407746 --8 626923679 -14678 18 -8.0 9976.831992670684 +-7 626923679 -14584 23 -7.0 9946.605446407748 +-8 626923679 -14678 18 -8.0 9976.831992670686 -9 626923679 -15329 31 -9.0 8999.391457373968 0 626923679 -14254 24 0.0 10057.5018088718 -1 626923679 -14610 30 1.0 10016.486277900643 -10 626923679 -15887 26 10.0 9104.820520135108 +1 626923679 -14610 30 1.0 10016.486277900645 +10 626923679 -15887 26 10.0 9104.82052013511 11 1072654057 -14696 1035 11.0 9531.018991371746 12 626923679 -14642 18 12.0 9696.038286378725 13 626923679 -14771 26 13.0 8128.265919972384 14 626923679 -13367 28 14.0 9074.674998750581 -15 626923679 -16339 28 15.0 9770.473400901916 -16 626923679 -14001 26 16.0 10130.883606275334 +15 626923679 -16339 28 15.0 9770.473400901918 +16 626923679 -14001 26 16.0 10130.883606275338 17 626923679 -16109 22 16.73235294865627 1353416.3383574807 18 626923679 -15779 21 18.0 10820.004053788869 19 626923679 -16049 21 19.0 9423.560227007669 @@ -1333,28 +1357,28 @@ POSTHOOK: Input: default@alltypes_orc 23 626923679 -15514 24 23.0 8542.419116415425 24 626923679 -15086 24 24.0 9661.203790645088 25 626923679 -11349 23 25.0 8888.959012093468 -26 626923679 -14516 29 26.0 9123.125508880432 -27 626923679 -14965 24 27.0 9802.871860196345 +26 626923679 -14516 29 26.0 9123.125508880434 +27 626923679 -14965 24 27.0 9802.871860196343 28 626923679 -14455 20 28.0 9283.289383115296 29 626923679 -15892 16 29.0 9874.046501817154 -3 626923679 -16339 30 3.0 10483.526375885149 -30 626923679 -14111 27 30.0 10066.520234676527 +3 626923679 -16339 30 3.0 10483.526375885147 +30 626923679 -14111 27 30.0 10066.520234676529 31 626923679 -15960 24 31.0 10427.970184550613 32 626923679 -14044 24 32.0 8376.464579403413 -33 626923679 -14642 29 40.61776386607777 1304429.5939037625 -34 626923679 -15059 28 34.0 8756.731536033676 +33 626923679 -14642 29 40.61776386607777 1304429.593903763 +34 626923679 -15059 28 34.0 8756.731536033674 35 626923679 -16153 27 35.0 10351.008404963042 36 626923679 -15912 20 36.0 9475.257975138164 37 626923679 -12081 24 37.0 9017.860034890362 38 626923679 -15248 29 38.0 9900.256257785535 -39 626923679 -14887 28 39.0 10513.343644635232 -4 626923679 -15999 29 4.0 9516.189702058042 +39 626923679 -14887 28 39.0 10513.343644635233 +4 626923679 -15999 29 4.0 9516.189702058044 40 626923679 -15861 22 40.0 9283.318678549174 -41 626923679 -13480 21 41.0 9016.291129937847 +41 626923679 -13480 21 41.0 9016.291129937848 42 626923679 -15834 28 42.0 10318.01399719996 43 626923679 -15703 28 43.0 8757.796089055722 44 626923679 -11185 16 44.0 9425.076634933797 -45 626923679 -15228 18 45.0 9459.968668643689 +45 626923679 -15228 18 45.0 9459.968668643687 46 626923679 -15187 22 46.0 9685.908173160062 47 626923679 -16324 22 47.0 9822.220821743611 48 626923679 -16372 29 48.0 10079.286173063345 @@ -1365,18 +1389,18 @@ POSTHOOK: Input: default@alltypes_orc 52 626923679 -15450 20 52.0 9261.723648435052 53 626923679 -16217 30 53.0 9895.247408969733 54 626923679 -15245 16 54.0 9789.50878424882 -55 626923679 -15887 21 55.0 9826.38569192808 +55 626923679 -15887 21 55.0 9826.385691928082 56 626923679 -12631 21 56.0 8860.917133763547 57 626923679 -15620 25 57.0 9413.99393840875 58 626923679 -13627 20 58.0 9083.529665947459 -59 626923679 -16076 17 59.0 10117.44967077967 -6 626923679 -15948 30 6.0 9644.247255286113 +59 626923679 -16076 17 59.0 10117.449670779672 +6 626923679 -15948 30 6.0 9644.247255286115 60 626923679 -13606 23 60.0 8346.267436552042 -61 626923679 -15894 29 61.0 8785.714950987198 -62 626923679 -14307 17 62.0 9491.752726667326 +61 626923679 -15894 29 61.0 8785.7149509872 +62 626923679 -14307 17 62.0 9491.752726667324 7 626923679 -15839 25 7.0 10077.151640330823 8 1070764888 -15778 1034 8.0 9562.355155774725 -9 626923679 -13629 25 9.0 10157.217948808622 +9 626923679 -13629 25 9.0 10157.21794880862 NULL 1073418988 -16379 3115 NULL 305051.4870777435 PREHOOK: query: create table orcTbl (t1 tinyint, t2 tinyint) stored as orc diff --git a/ql/src/test/results/clientpositive/spark/vectorization_not.q.out b/ql/src/test/results/clientpositive/spark/vectorization_not.q.out index b5587ba4d0..cf92a6f694 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_not.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_not.q.out @@ -55,4 +55,4 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593627E9 3.875716535945533E8 0.0 2.0634715172019392E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0516820315185745E9 -2.0634715172019392E18 1.5020929380914048E17 -64 64 diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out index 6d525518cb..0cf6955d2b 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out @@ -32,14 +32,14 @@ STAGE PLANS: outputColumnNames: cbigint Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint) + aggregations: sum(cbigint), count(cbigint) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -60,17 +60,21 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out index c04f200269..eff600e0b0 100644 --- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out @@ -102,33 +102,34 @@ STAGE PLANS: predicate: (((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble + expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5] + projectedOutputColumnNums: [2, 5, 1, 4, 0, 13, 18, 16, 20] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 17:double) -> 18:double, CastLongToDouble(col 1:smallint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 17:double, CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFCount(col 1:smallint) -> bigint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: bigint), _col10 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -140,24 +141,41 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_samp(VALUE._col3), var_samp(VALUE._col4), avg(VALUE._col5), stddev_samp(VALUE._col6), min(VALUE._col7), count(VALUE._col8) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:tinyint) -> tinyint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 + -3728.0D) (type: double), (- (_col0 + -3728.0D)) (type: double), (- (- (_col0 + -3728.0D))) (type: double), ((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) (type: double), _col1 (type: double), (- _col0) (type: double), _col2 (type: double), (((- (- (_col0 + -3728.0D))) * (_col0 + -3728.0D)) * (- (- (_col0 + -3728.0D)))) (type: double), _col3 (type: double), (- _col2) (type: double), (_col2 - (- (- (_col0 + -3728.0D)))) (type: double), ((_col2 - (- (- (_col0 + -3728.0D)))) * _col2) (type: double), _col4 (type: double), _col5 (type: double), (10.175D - _col4) (type: double), (- (10.175D - _col4)) (type: double), ((- _col2) / -563.0D) (type: double), _col6 (type: double), (- ((- _col2) / -563.0D)) (type: double), (_col0 / _col1) (type: double), _col7 (type: tinyint), _col8 (type: bigint), (UDFToDouble(_col7) / ((- _col2) / -563.0D)) (type: double), (- (_col0 / _col1)) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + -3728.0D) (type: double), (- ((_col0 / _col1) + -3728.0D)) (type: double), (- (- ((_col0 / _col1) + -3728.0D))) (type: double), ((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) (type: double), _col2 (type: double), (- (_col0 / _col1)) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) (type: double), (((- (- ((_col0 / _col1) + -3728.0D))) * ((_col0 / _col1) + -3728.0D)) * (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), (power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) (type: double), ((power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5) - (- (- ((_col0 / _col1) + -3728.0D)))) * power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) (type: double), ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), (_col8 / _col9) (type: double), (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: double), (- (10.175D - ((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END))) (type: double), ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D) (type: double), power(((_col3 - ((_col4 * _col4) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), (- ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), ((_col0 / _col1) / _col2) (type: double), _col10 (type: tinyint), _col7 (type: bigint), (UDFToDouble(_col10) / ((- power(((_col3 - ((_col4 * _col4) / _col1)) / _col1), 0.5)) / -563.0D)) (type: double), (- ((_col0 / _col1) / _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [11, 13, 12, 15, 14, 2, 17, 16, 19, 18, 24, 25, 27, 26, 20, 30, 34, 31, 37, 41, 42, 10, 7, 44, 38] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double, DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColAddDoubleScalar(col 12:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 15:double) -> 14:double) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: DoubleColUnaryMinus(col 14:double)(children: DoubleColUnaryMinus(col 16:double)(children: DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 16:double) -> 14:double) -> 16:double, DoubleColAddDoubleScalar(col 14:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 14:double) -> 17:double) -> 14:double, DoubleColUnaryMinus(col 16:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 16:double) -> 17:double, FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 18:double, col 20:double)(children: DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleColUnaryMinus(col 19:double)(children: DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 19:double) -> 18:double) -> 19:double, DoubleColAddDoubleScalar(col 18:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 18:double) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 19:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColAddDoubleScalar(col 19:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 19:double) -> 20:double) -> 19:double) -> 20:double) -> 19:double, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 20:double)(children: DoubleColDivideLongColumn(col 18:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 20:double) -> 18:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 20:double) -> 18:double, DoubleColUnaryMinus(col 20:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double) -> 20:double) -> 24:double, DoubleColSubtractDoubleColumn(col 20:double, col 26:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 25:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 25:double) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColAddDoubleScalar(col 25:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 25:double) -> 26:double) -> 25:double) -> 26:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 26:double, col 20:double)(children: DoubleColSubtractDoubleColumn(col 20:double, col 27:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double) -> 26:double) -> 20:double, DoubleColUnaryMinus(col 26:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColAddDoubleScalar(col 26:double, val -3728.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 26:double) -> 27:double) -> 26:double) -> 27:double) -> 26:double, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 27:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double) -> 20:double) -> 27:double, DoubleColDivideLongColumn(col 20:double, col 29:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 26:double)(children: DoubleColDivideLongColumn(col 20:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 20:double) -> 26:double) -> 20:double, IfExprNullCondExpr(col 23:boolean, null, col 28:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 28:bigint) -> 29:bigint) -> 26:double, DoubleColDivideLongColumn(col 8:double, col 9:bigint) -> 20:double, DoubleScalarSubtractDoubleColumn(val 10.175, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 33:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 30:double) -> 31:double) -> 30:double, IfExprNullCondExpr(col 29:boolean, null, col 32:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 29:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 32:bigint) -> 33:bigint) -> 31:double) -> 30:double, DoubleColUnaryMinus(col 31:double)(children: DoubleScalarSubtractDoubleColumn(val 10.175, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 36:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 34:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 34:double) -> 31:double, IfExprNullCondExpr(col 33:boolean, null, col 35:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 33:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 35:bigint) -> 36:bigint) -> 34:double) -> 31:double) -> 34:double, DoubleColDivideDoubleScalar(col 37:double, val -563.0)(children: DoubleColUnaryMinus(col 31:double)(children: FuncPowerDoubleToDouble(col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 37:double)(children: DoubleColDivideLongColumn(col 31:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double) -> 37:double) -> 31:double, FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 40:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 38:double)(children: DoubleColDivideLongColumn(col 37:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 37:double) -> 38:double) -> 37:double, IfExprNullCondExpr(col 36:boolean, null, col 39:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 36:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 39:bigint) -> 40:bigint) -> 38:double) -> 37:double, DoubleColUnaryMinus(col 38:double)(children: DoubleColDivideDoubleScalar(col 41:double, val -563.0)(children: DoubleColUnaryMinus(col 38:double)(children: FuncPowerDoubleToDouble(col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 41:double)(children: DoubleColDivideLongColumn(col 38:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double) -> 38:double) -> 41:double, DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 42:double, DoubleColDivideDoubleColumn(col 38:double, col 43:double)(children: CastLongToDouble(col 10:tinyint) -> 38:double, DoubleColDivideDoubleScalar(col 44:double, val -563.0)(children: DoubleColUnaryMinus(col 43:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 44:double)(children: DoubleColDivideLongColumn(col 43:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double) -> 43:double) -> 44:double, DoubleColUnaryMinus(col 43:double)(children: DoubleColDivideDoubleColumn(col 38:double, col 2:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 38:double) -> 43:double) -> 38:double + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -245,7 +263,7 @@ WHERE ((762 = cbigint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900256E8 4.095728233294762E24 8549.657499338187 -5.481251832900256E8 3.8812872199726474E8 2.12743126884874112E17 3.0054786945575034E17 -5.700752675298234 -3.0054786945575034E17 3.0054786945575034E17 973579.3664121237 5.48222463472403E8 -973579.3664121237 -18.377427808018613 -64 2044 -6.573680812059066E-5 18.377427808018613 +1.6000018929276082E8 1.5999646129276082E8 -1.5999646129276082E8 1.5999646129276082E8 2.5598867626205912E16 -8706342.964000002 -1.6000018929276082E8 5.481251832900251E8 4.095728233294762E24 8549.657499338193 -5.481251832900251E8 3.8812872199726427E8 2.12743126884873664E17 3.0054786945574982E17 -5.700752675298234 -3.0054786945574982E17 3.0054786945574982E17 973579.3664121227 5.482224634724026E8 -973579.3664121227 -18.377427808018613 -64 2044 -6.573680812059072E-5 18.377427808018613 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT MAX(cint), (MAX(cint) / -3728), @@ -344,33 +362,34 @@ STAGE PLANS: predicate: (((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cbigint <= 197L) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28D) and (UDFToDouble(csmallint) > cdouble)) or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean) Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble + expressions: cint (type: int), cbigint (type: bigint), csmallint (type: smallint), cdouble (type: double), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] + projectedOutputColumnNums: [2, 3, 1, 5, 0, 13, 16, 14, 18, 15, 20] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 2:int) -> 17:double, CastLongToDouble(col 2:int) -> 19:double) -> 20:double Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint) + aggregations: max(_col0), sum(_col6), sum(_col5), count(_col1), sum(_col8), sum(_col7), count(_col2), max(_col3), sum(_col4), count(_col4), min(_col0), min(_col3), sum(_col10), sum(_col9), count(_col0) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFAvgLong(col 0:tinyint) -> struct, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp + aggregators: VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumLong(col 0:tinyint) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: int), _col6 (type: double), _col7 (type: struct), _col8 (type: struct) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: int), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -382,24 +401,41 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), var_pop(VALUE._col1), stddev_pop(VALUE._col2), max(VALUE._col3), avg(VALUE._col4), min(VALUE._col5), min(VALUE._col6), stddev_samp(VALUE._col7), var_samp(VALUE._col8) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), sum(VALUE._col8), count(VALUE._col9), min(VALUE._col10), min(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), count(VALUE._col14) + Group By Vectorization: + aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFMaxDouble(col 7:double) -> double, VectorUDAFSumLong(col 8:bigint) -> bigint, VectorUDAFCountMerge(col 9:bigint) -> bigint, VectorUDAFMinLong(col 10:int) -> int, VectorUDAFMinDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCountMerge(col 14:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0D) (type: double), (_col0 * -3728) (type: int), _col1 (type: double), (- (_col0 * -3728)) (type: int), _col2 (type: double), (-563 % (_col0 * -3728)) (type: int), (_col1 / _col2) (type: double), (- _col2) (type: double), _col3 (type: double), _col4 (type: double), (_col2 - 10.175D) (type: double), _col5 (type: int), (UDFToDouble((_col0 * -3728)) % (_col2 - 10.175D)) (type: double), (- _col3) (type: double), _col6 (type: double), (_col3 % -26.28D) (type: double), _col7 (type: double), (- (UDFToDouble(_col0) / -3728.0D)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0D) - _col4) (type: double), (- (_col0 * -3728)) (type: int), _col8 (type: double) + expressions: _col0 (type: int), (UDFToDouble(_col0) / -3728.0D) (type: double), (_col0 * -3728) (type: int), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (_col0 * -3728)) (type: int), power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) (type: double), (-563 % (_col0 * -3728)) (type: int), (((_col1 - ((_col2 * _col2) / _col3)) / _col3) / power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5)) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5)) (type: double), _col7 (type: double), (_col8 / _col9) (type: double), (power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) - 10.175D) (type: double), _col10 (type: int), (UDFToDouble((_col0 * -3728)) % (power(((_col4 - ((_col5 * _col5) / _col6)) / _col6), 0.5) - 10.175D)) (type: double), (- _col7) (type: double), _col11 (type: double), (_col7 % -26.28D) (type: double), power(((_col4 - ((_col5 * _col5) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END), 0.5) (type: double), (- (UDFToDouble(_col0) / -3728.0D)) (type: double), ((- (_col0 * -3728)) % (-563 % (_col0 * -3728))) (type: int), ((UDFToDouble(_col0) / -3728.0D) - (_col8 / _col9)) (type: double), (- (_col0 * -3728)) (type: int), ((_col12 - ((_col13 * _col13) / _col14)) / CASE WHEN ((_col14 = 1L)) THEN (null) ELSE ((_col14 - 1)) END) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 16, 17, 18, 20, 15, 22, 24, 23, 7, 21, 26, 10, 27, 25, 11, 28, 29, 30, 32, 37, 35, 36] + selectExpressions: DoubleColDivideDoubleScalar(col 15:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 15:double) -> 16:double, LongColMultiplyLongScalar(col 0:int, val -3728) -> 17:int, DoubleColDivideLongColumn(col 15:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 18:double)(children: DoubleColDivideLongColumn(col 15:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 15:double) -> 18:double) -> 15:double) -> 18:double, LongColUnaryMinus(col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 20:int, FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 15:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 21:double)(children: DoubleColDivideLongColumn(col 15:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double) -> 21:double) -> 15:double) -> 21:double) -> 15:double, LongScalarModuloLongColumn(val -563, col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 22:int, DoubleColDivideDoubleColumn(col 23:double, col 21:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 24:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 21:double) -> 24:double) -> 21:double) -> 24:double) -> 21:double) -> 24:double, DoubleColUnaryMinus(col 21:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, LongColDivideLongColumn(col 8:bigint, col 9:bigint) -> 21:double, DoubleColSubtractDoubleScalar(col 25:double, val 10.175)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 25:double) -> 26:double) -> 25:double) -> 26:double) -> 25:double) -> 26:double, DoubleColModuloDoubleColumn(col 25:double, col 28:double)(children: CastLongToDouble(col 19:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 19:int) -> 25:double, DoubleColSubtractDoubleScalar(col 27:double, val 10.175)(children: FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double) -> 28:double) -> 27:double, DoubleColUnaryMinus(col 7:double) -> 25:double, DoubleColModuloDoubleScalar(col 7:double, val -26.28) -> 28:double, FuncPowerDoubleToDouble(col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 19:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 33:double)(children: DoubleColDivideDoubleScalar(col 30:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 30:double) -> 33:double) -> 30:double, LongColModuloLongColumn(col 34:int, col 35:int)(children: LongColUnaryMinus(col 32:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 32:int) -> 34:int, LongScalarModuloLongColumn(val -563, col 32:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 32:int) -> 35:int) -> 32:int, DoubleColSubtractDoubleColumn(col 36:double, col 33:double)(children: DoubleColDivideDoubleScalar(col 33:double, val -3728.0)(children: CastLongToDouble(col 0:int) -> 33:double) -> 36:double, LongColDivideLongColumn(col 8:bigint, col 9:bigint) -> 33:double) -> 37:double, LongColUnaryMinus(col 34:int)(children: LongColMultiplyLongScalar(col 0:int, val -3728) -> 34:int) -> 35:int, DoubleColDivideLongColumn(col 33:double, col 39:bigint)(children: DoubleColSubtractDoubleColumn(col 12:double, col 36:double)(children: DoubleColDivideLongColumn(col 33:double, col 14:bigint)(children: DoubleColMultiplyDoubleColumn(col 13:double, col 13:double) -> 33:double) -> 36:double) -> 33:double, IfExprNullCondExpr(col 34:boolean, null, col 38:bigint)(children: LongColEqualLongScalar(col 14:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 14:bigint, val 1) -> 38:bigint) -> 39:bigint) -> 36:double + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 420 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -481,7 +517,7 @@ WHERE (((cbigint <= 197) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --20301111 5445.576984978541 -1626869520 7.9684972882908944E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 511 5454.512308361625 1626869520 7.2647256545687792E16 +-20301111 5445.576984978541 -1626869520 7.9684972882908896E16 1626869520 NULL -563 NULL NULL NULL -8.935323383084578 NULL -1069736047 NULL NULL NULL NULL NULL -5445.576984978541 -58 5454.512308361625 1626869520 7.2647256545687872E16 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT VAR_POP(cbigint), (-(VAR_POP(cbigint))), @@ -578,33 +614,34 @@ STAGE PLANS: predicate: (((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a')) or (cfloat = 762) or (cstring1 = 'ss') or (ctimestamp1 = ctimestamp2)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble + expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 5] + projectedOutputColumnNums: [3, 0, 1, 2, 5, 13, 16, 14, 18, 15] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 15:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint) + aggregations: sum(_col6), sum(_col5), count(_col0), count(), max(_col1), sum(_col8), sum(_col7), count(_col2), max(_col3), sum(_col9), sum(_col4), count(_col4), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFAvgLong(col 0:tinyint) -> struct + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumLong(col 0:tinyint) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: tinyint), _col3 (type: struct), _col4 (type: int), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: int), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -616,24 +653,41 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), count(VALUE._col1), max(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), stddev_samp(VALUE._col5), count(VALUE._col6), avg(VALUE._col7) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3), max(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), count(VALUE._col12), sum(VALUE._col13) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFMaxLong(col 4:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxLong(col 8:int) -> int, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFCountMerge(col 12:bigint) -> bigint, VectorUDAFSumLong(col 13:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (- _col0) (type: double), (_col0 - (- _col0)) (type: double), _col1 (type: bigint), (CAST( _col1 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col2 (type: tinyint), (UDFToDouble(_col1) - (- _col0)) (type: double), (- (- _col0)) (type: double), (-1.0D % (- _col0)) (type: double), _col1 (type: bigint), (- _col1) (type: bigint), _col3 (type: double), (- (- (- _col0))) (type: double), (762L * (- _col1)) (type: bigint), _col4 (type: int), (UDFToLong(_col2) + (762L * (- _col1))) (type: bigint), ((- _col0) + UDFToDouble(_col4)) (type: double), _col5 (type: double), ((- _col1) % _col1) (type: bigint), _col6 (type: bigint), _col7 (type: double), (-3728L % (UDFToLong(_col2) + (762L * (- _col1)))) (type: bigint) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) - (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), _col3 (type: bigint), (CAST( _col3 AS decimal(19,0)) % 79.553) (type: decimal(5,3)), _col4 (type: tinyint), (UDFToDouble(_col3) - (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), (- (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), (-1.0D % (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2))) (type: double), _col3 (type: bigint), (- _col3) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col7)) / _col7), 0.5) (type: double), (- (- (- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)))) (type: double), (762L * (- _col3)) (type: bigint), _col8 (type: int), (UDFToLong(_col4) + (762L * (- _col3))) (type: bigint), ((- ((_col0 - ((_col1 * _col1) / _col2)) / _col2)) + UDFToDouble(_col8)) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END), 0.5) (type: double), ((- _col3) % _col3) (type: bigint), _col12 (type: bigint), (_col13 / _col12) (type: double), (-3728L % (UDFToLong(_col4) + (762L * (- _col3)))) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [15, 14, 18, 3, 20, 4, 21, 17, 22, 3, 23, 16, 24, 27, 8, 26, 30, 25, 33, 12, 29, 34] + selectExpressions: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 15:double) -> 14:double) -> 15:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 16:double)(children: DoubleColDivideLongColumn(col 14:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 14:double) -> 16:double) -> 14:double) -> 16:double) -> 14:double, DoubleColSubtractDoubleColumn(col 17:double, col 16:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 18:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double) -> 16:double) -> 18:double, DecimalColModuloDecimalScalar(col 19:decimal(19,0), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 19:decimal(19,0)) -> 20:decimal(5,3), DoubleColSubtractDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 17:double) -> 21:double) -> 17:double) -> 21:double) -> 17:double) -> 21:double, DoubleColUnaryMinus(col 16:double)(children: DoubleColUnaryMinus(col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 17:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double) -> 16:double) -> 17:double, DoubleScalarModuloDoubleColumn(val -1.0, col 16:double)(children: DoubleColUnaryMinus(col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double, LongColUnaryMinus(col 3:bigint) -> 23:bigint, FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 16:double) -> 24:double) -> 16:double) -> 24:double) -> 16:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColUnaryMinus(col 24:double)(children: DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double, LongScalarMultiplyLongColumn(val 762, col 26:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 26:bigint) -> 27:bigint, LongColAddLongColumn(col 4:bigint, col 28:bigint)(children: col 4:tinyint, LongScalarMultiplyLongColumn(val 762, col 26:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 26:bigint) -> 28:bigint) -> 26:bigint, DoubleColAddDoubleColumn(col 25:double, col 29:double)(children: DoubleColUnaryMinus(col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 2:bigint)(children: DoubleColSubtractDoubleColumn(col 0:double, col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 2:bigint)(children: DoubleColMultiplyDoubleColumn(col 1:double, col 1:double) -> 25:double) -> 29:double) -> 25:double) -> 29:double) -> 25:double, CastLongToDouble(col 8:int) -> 29:double) -> 30:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 29:double)(children: DoubleColDivideLongColumn(col 25:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 25:double) -> 29:double) -> 25:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 29:double) -> 25:double, LongColModuloLongColumn(col 32:bigint, col 3:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 32:bigint) -> 33:bigint, LongColDivideLongColumn(col 13:bigint, col 12:bigint) -> 29:double, LongScalarModuloLongColumn(val -3728, col 32:bigint)(children: LongColAddLongColumn(col 4:bigint, col 34:bigint)(children: col 4:tinyint, LongScalarMultiplyLongColumn(val 762, col 32:bigint)(children: LongColUnaryMinus(col 3:bigint) -> 32:bigint) -> 34:bigint) -> 32:bigint) -> 34:bigint + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -713,7 +767,7 @@ WHERE ((ctimestamp1 = ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -2.5109214708345636E18 -2.5109214708345636E18 5.0218429416691272E18 2780 75.198 62 2.5109214708345661E18 2.5109214708345636E18 -1.0 2780 -2780 9460.675803068349 -2.5109214708345636E18 -2118360 1072872630 -2118298 -2.5109214697616911E18 185935.34910862707 0 758 -1.733509234828496 -3728 +2.5109214708344376E18 -2.5109214708344376E18 5.0218429416688753E18 2780 75.198 62 2.5109214708344402E18 2.5109214708344376E18 -1.0 2780 -2780 9460.675803068356 -2.5109214708344376E18 -2118360 1072872630 -2118298 -2.5109214697615652E18 185935.34910862715 0 758 -1.733509234828496 -3728 WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT AVG(ctinyint), @@ -791,33 +845,34 @@ STAGE PLANS: predicate: (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0D)) or ((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and (cstring1 >= 'ss')) or (cfloat = 17)) (type: boolean) Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint), cfloat (type: float) - outputColumnNames: ctinyint, cint, cbigint, cfloat + expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 3, 4] + projectedOutputColumnNums: [0, 3, 2, 4, 13, 16, 14, 18] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double, CastLongToDouble(col 3:bigint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 15:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat) + aggregations: sum(_col0), count(_col0), max(_col1), sum(_col5), sum(_col4), count(_col2), sum(_col7), sum(_col6), count(_col1), max(_col3) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 0:tinyint) -> struct, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_pop, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFMaxDouble(col 4:float) -> float + aggregators: VectorUDAFSumLong(col 0:tinyint) -> bigint, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: bigint), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: float) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: float) Execution mode: vectorized Map Vectorization: enabled: true @@ -829,24 +884,41 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), max(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_pop(VALUE._col4), max(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), max(VALUE._col9) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxLong(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountMerge(col 8:bigint) -> bigint, VectorUDAFMaxDouble(col 9:float) -> float + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 + 6981.0D) (type: double), ((_col0 + 6981.0D) + _col0) (type: double), _col1 (type: bigint), (((_col0 + 6981.0D) + _col0) / _col0) (type: double), (- (_col0 + 6981.0D)) (type: double), _col2 (type: double), (_col0 % (- (_col0 + 6981.0D))) (type: double), _col3 (type: double), _col4 (type: double), (- _col1) (type: bigint), (UDFToDouble((- _col1)) / _col2) (type: double), _col5 (type: float), (_col4 * -26.28D) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) + 6981.0D) (type: double), (((_col0 / _col1) + 6981.0D) + (_col0 / _col1)) (type: double), _col2 (type: bigint), ((((_col0 / _col1) + 6981.0D) + (_col0 / _col1)) / (_col0 / _col1)) (type: double), (- ((_col0 / _col1) + 6981.0D)) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END), 0.5) (type: double), ((_col0 / _col1) % (- ((_col0 / _col1) + 6981.0D))) (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), ((_col6 - ((_col7 * _col7) / _col8)) / _col8) (type: double), (- _col2) (type: bigint), (UDFToDouble((- _col2)) / power(((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END), 0.5)) (type: double), _col9 (type: float), (((_col6 - ((_col7 * _col7) / _col8)) / _col8) * -26.28D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [10, 12, 14, 2, 13, 11, 15, 21, 20, 22, 19, 25, 9, 16] + selectExpressions: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 10:double, DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 12:double, DoubleColAddDoubleColumn(col 13:double, col 11:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 14:double, DoubleColDivideDoubleColumn(col 15:double, col 11:double)(children: DoubleColAddDoubleColumn(col 13:double, col 11:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 15:double, LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 13:double, DoubleColUnaryMinus(col 15:double)(children: DoubleColAddDoubleScalar(col 11:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 11:double) -> 15:double) -> 11:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 15:double) -> 16:double) -> 15:double, IfExprNullCondExpr(col 17:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 16:double) -> 15:double, DoubleColModuloDoubleColumn(col 16:double, col 20:double)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 16:double, DoubleColUnaryMinus(col 21:double)(children: DoubleColAddDoubleScalar(col 20:double, val 6981.0)(children: LongColDivideLongColumn(col 0:bigint, col 1:bigint) -> 20:double) -> 21:double) -> 20:double) -> 21:double, DoubleColDivideLongColumn(col 16:double, col 5:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 20:double)(children: DoubleColDivideLongColumn(col 16:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 16:double) -> 20:double) -> 16:double) -> 20:double, DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 22:double)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 16:double) -> 22:double) -> 16:double) -> 22:double, LongColUnaryMinus(col 2:bigint) -> 19:bigint, DoubleColDivideDoubleColumn(col 16:double, col 24:double)(children: CastLongToDouble(col 23:bigint)(children: LongColUnaryMinus(col 2:bigint) -> 23:bigint) -> 16:double, FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 24:double) -> 25:double) -> 24:double, IfExprNullCondExpr(col 23:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 23:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 25:double) -> 24:double) -> 25:double, DoubleColMultiplyDoubleScalar(col 24:double, val -26.28)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 6:double, col 24:double)(children: DoubleColDivideLongColumn(col 16:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 16:double) -> 24:double) -> 16:double) -> 24:double) -> 16:double + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 328 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -907,7 +979,7 @@ WHERE (((ctimestamp2 <= ctimestamp1) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222071937E8 -0.5934409161894847 2.5099887741860852E16 1.52140608502098816E18 -2141851355 -13.510823917813237 79.553 -3.998255191435157E19 +-0.5934409161894847 6980.406559083811 6979.813118167622 2141851355 -11761.597368421053 -6980.406559083811 1.5852855222070777E8 -0.5934409161894847 2.5099887741857176E16 1.52140608502098611E18 -2141851355 -13.510823917814225 79.553 -3.998255191435152E19 PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT cint, cdouble, @@ -2097,26 +2169,27 @@ STAGE PLANS: predicate: (((UDFToInteger(csmallint) = -6432) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint))) and (UDFToInteger(csmallint) >= -257)) (type: boolean) Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint) - outputColumnNames: ctinyint, csmallint, cbigint + expressions: csmallint (type: smallint), cbigint (type: bigint), ctinyint (type: tinyint), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3] + projectedOutputColumnNums: [1, 3, 0, 13, 16, 14, 18] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 1:smallint) -> 14:double, CastLongToDouble(col 1:smallint) -> 15:double) -> 16:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count() + aggregations: sum(_col4), sum(_col3), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count() Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:smallint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] - keys: csmallint (type: smallint) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + keys: _col0 (type: smallint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint) @@ -2127,7 +2200,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: struct), _col4 (type: bigint) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -2139,25 +2212,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), count(VALUE._col7) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFSumLong(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFCountMerge(col 8:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:smallint + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] keys: KEY._col0 (type: smallint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010L) (type: bigint) + expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col4 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col4)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), ((_col5 - ((_col6 * _col6) / _col7)) / _col7) (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col8 (type: bigint), (_col8 - -89010L) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 9, 10, 16, 4, 18, 19, 17, 14, 8, 20] + selectExpressions: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 9:int, FuncPowerDoubleToDouble(col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 14:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 10:double) -> 11:double) -> 10:double, IfExprNullCondExpr(col 12:boolean, null, col 13:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 12:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 13:bigint) -> 14:bigint) -> 11:double) -> 10:double, DecimalScalarDivideDecimalColumn(val -1.389, col 15:decimal(5,0))(children: CastLongToDecimal(col 0:smallint) -> 15:decimal(5,0)) -> 16:decimal(10,9), DoubleColDivideDoubleColumn(col 11:double, col 17:double)(children: CastLongToDouble(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 11:double, CastLongToDouble(col 4:bigint) -> 17:double) -> 18:double, LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 19:int, DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 17:double)(children: DoubleColDivideLongColumn(col 11:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 11:double) -> 17:double) -> 11:double) -> 17:double, LongColUnaryMinus(col 20:int)(children: LongColUnaryMinus(col 14:int)(children: LongColModuloLongScalar(col 0:int, val -75)(children: col 0:smallint) -> 14:int) -> 20:int) -> 14:int, LongColSubtractLongScalar(col 8:bigint, val -89010) -> 20:bigint Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint) sort order: +++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -2259,7 +2351,7 @@ POSTHOOK: Input: default@alltypesorc -42 -42 NULL 0.033071429 NULL NULL 42 0.0 -42 1 89011 -49 -49 NULL 0.028346939 NULL NULL 49 0.0 -49 1 89011 -62 -62 NULL 0.022403226 NULL NULL 62 0.0 -62 1 89011 --75 0 0.0 0.018520000 NULL NULL 0 107.55555555555556 0 3 89013 +-75 0 0.0 0.018520000 NULL NULL 0 107.55555555555554 0 3 89013 -77 -2 NULL 0.018038961 NULL NULL 2 0.0 -2 1 89011 -84 -9 NULL 0.016535714 NULL NULL 9 0.0 -9 1 89011 -89 -14 NULL 0.015606742 NULL NULL 14 0.0 -14 1 89011 @@ -2352,26 +2444,27 @@ STAGE PLANS: predicate: ((((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (CAST( ctinyint AS decimal(6,2)) = 2563.58) or ((cdouble <= UDFToDouble(cbigint)) and (CAST( cbigint AS decimal(21,2)) < -5638.15))) and (cdouble > 2563.58D)) (type: boolean) Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cfloat (type: float), cdouble (type: double) - outputColumnNames: cfloat, cdouble + expressions: cdouble (type: double), cfloat (type: float), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [4, 5] + projectedOutputColumnNums: [5, 4, 14] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble) + aggregations: sum(_col2), sum(_col0), count(_col0), count(_col1), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFSumDouble(col 5:double) -> double + aggregators: VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cdouble (type: double) + projectedOutputColumnNums: [0, 1, 2, 3, 4] + keys: _col0 (type: double) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) @@ -2382,7 +2475,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct), _col5 (type: struct), _col6 (type: double) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -2394,25 +2487,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: var_samp(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), var_pop(VALUE._col3), stddev_pop(VALUE._col4), sum(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3), sum(VALUE._col4) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDouble(col 5:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:double + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4] keys: KEY._col0 (type: double) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), _col1 (type: double), (2563.58D * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58D * _col1) + -5638.15D) (type: double), ((- _col1) * ((2563.58D * _col1) + -5638.15D)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0D) (type: double), _col6 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) + expressions: _col0 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double), (2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), _col4 (type: bigint), ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D) (type: double), ((- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) * ((2563.58D * ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) + -5638.15D)) (type: double), _col5 (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (_col0 - (- ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END))) (type: double), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (_col0 + ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END)) (type: double), (_col0 * 762.0D) (type: double), _col2 (type: double), (-863.257D % (_col0 * 762.0D)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 7, 6, 11, 4, 17, 20, 5, 23, 26, 14, 29, 30, 2, 34] + selectExpressions: DoubleColDivideLongColumn(col 6:double, col 10:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 7:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 7:double) -> 6:double, IfExprNullCondExpr(col 8:boolean, null, col 9:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 9:bigint) -> 10:bigint) -> 7:double, DoubleScalarMultiplyDoubleColumn(val 2563.58, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 11:double)(children: DoubleColDivideLongColumn(col 6:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 6:double) -> 11:double) -> 6:double, IfExprNullCondExpr(col 10:boolean, null, col 12:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 10:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 12:bigint) -> 13:bigint) -> 11:double) -> 6:double, DoubleColUnaryMinus(col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 16:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 14:double)(children: DoubleColDivideLongColumn(col 11:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 11:double) -> 14:double) -> 11:double, IfExprNullCondExpr(col 13:boolean, null, col 15:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 15:bigint) -> 16:bigint) -> 14:double) -> 11:double, DoubleColAddDoubleScalar(col 14:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 17:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 17:double) -> 14:double, IfExprNullCondExpr(col 16:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 16:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 17:double) -> 14:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 14:double, col 23:double)(children: DoubleColUnaryMinus(col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 20:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 20:double) -> 14:double, IfExprNullCondExpr(col 19:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 19:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 20:double) -> 14:double, DoubleColAddDoubleScalar(col 20:double, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 25:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 20:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 20:double) -> 23:double) -> 20:double, IfExprNullCondExpr(col 22:boolean, null, col 24:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 24:bigint) -> 25:bigint) -> 23:double) -> 20:double) -> 23:double) -> 20:double, DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 23:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 23:double) -> 14:double) -> 23:double, DoubleColSubtractDoubleColumn(col 0:double, col 14:double)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 26:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 26:double) -> 14:double, IfExprNullCondExpr(col 25:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 14:double) -> 26:double, FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 29:double)(children: DoubleColDivideLongColumn(col 14:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 14:double) -> 29:double) -> 14:double) -> 29:double) -> 14:double, DoubleColAddDoubleColumn(col 0:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 29:double) -> 30:double) -> 29:double, IfExprNullCondExpr(col 28:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 28:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 30:double) -> 29:double, DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 30:double, DoubleScalarModuloDoubleColumn(val -863.257, col 33:double)(children: DoubleColMultiplyDoubleScalar(col 0:double, val 762.0) -> 33:double) -> 34:double Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double) Reducer 3 @@ -2651,26 +2763,27 @@ STAGE PLANS: predicate: ((((UDFToInteger(ctinyint) <> -257) and cboolean2 is not null and cstring1 regexp '.*ss' and (UDFToDouble(ctimestamp1) > -3.0D)) or (UDFToDouble(ctimestamp2) = -5.0D) or ((UDFToDouble(ctimestamp1) < 0.0D) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))) and (UDFToDouble(ctimestamp1) <> 0.0D)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble, cstring1, ctimestamp1 + expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4, 5, 6, 8] + projectedOutputColumnNums: [8, 6, 2, 1, 0, 4, 5, 13, 16, 14, 18, 4, 15, 17, 21] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 15:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 0:tinyint) -> 19:double, CastLongToDouble(col 0:tinyint) -> 20:double) -> 21:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint) + aggregations: sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), count(), min(_col4), sum(_col10), sum(_col9), sum(_col12), sum(_col11), count(_col5), sum(_col2), sum(_col5), min(_col6), sum(_col14), sum(_col13), count(_col4) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop, VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: var_samp, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 4:float) -> struct, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFSumLong(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 8:timestamp, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - keys: ctimestamp1 (type: timestamp), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + keys: _col0 (type: timestamp), _col1 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) @@ -2681,7 +2794,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: double), _col12 (type: struct), _col13 (type: struct), _col14 (type: bigint) + value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -2693,25 +2806,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: stddev_pop(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), min(VALUE._col3), var_samp(VALUE._col4), var_pop(VALUE._col5), avg(VALUE._col6), var_samp(VALUE._col7), avg(VALUE._col8), min(VALUE._col9), var_pop(VALUE._col10), stddev_pop(VALUE._col11), sum(VALUE._col12) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), count(VALUE._col5), min(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), min(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), count(VALUE._col17) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumLong(col 5:bigint) -> bigint, VectorUDAFCountMerge(col 6:bigint) -> bigint, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMinLong(col 8:tinyint) -> tinyint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint, VectorUDAFSumLong(col 14:bigint) -> bigint, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFMinDouble(col 16:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCountMerge(col 19:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:timestamp, col 1:string + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175D) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28D - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28D - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175D)) (type: double), _col6 (type: double), (_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175D / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175D))) (type: double), _col10 (type: double), (((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175D) (type: double), (10.175D % (10.175D / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28D - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28D - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28D) (type: double) + expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (_col5 / _col6) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), _col7 (type: bigint), (- _col7) (type: bigint), ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), _col8 (type: tinyint), (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7))) (type: double), (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D)) (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) (type: double), (((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), (UDFToDouble((- _col7)) / power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (10.175D / (_col5 / _col6)) (type: double), (_col14 / _col4) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END) (type: double), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D))) (type: double), (_col15 / _col13) (type: double), (((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) * 10.175D) (type: double), (10.175D % (10.175D / (_col5 / _col6))) (type: double), (- _col8) (type: tinyint), _col16 (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / _col6) (type: double), (- ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)))) (type: double), ((- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) % (_col15 / _col13)) (type: double), (-26.28 / CAST( (- _col8) AS decimal(3,0))) (type: decimal(8,6)), power(((_col17 - ((_col18 * _col18) / _col19)) / _col19), 0.5) (type: double), _col14 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) / ((_col11 - ((_col12 * _col12) / _col13)) / _col13)) (type: double), (- (- _col7)) (type: bigint), _col7 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) % -26.28D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 20, 22, 23, 21, 25, 26, 7, 27, 24, 8, 30, 28, 32, 29, 35, 40, 39, 41, 38, 43, 46, 49, 42, 50, 51, 53, 16, 55, 56, 58, 61, 54, 14, 62, 67, 7, 65] + selectExpressions: FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 20:double) -> 21:double) -> 20:double, DoubleColMultiplyDoubleScalar(col 21:double, val 10.175)(children: FuncPowerDoubleToDouble(col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 22:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 21:double) -> 22:double) -> 21:double) -> 22:double) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 21:double)(children: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 23:double)(children: DoubleColDivideLongColumn(col 21:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double) -> 21:double) -> 23:double, LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 21:double, DoubleColUnaryMinus(col 24:double)(children: FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double) -> 24:double) -> 25:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 24:double)(children: FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 26:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 26:double) -> 24:double) -> 26:double) -> 24:double) -> 26:double, LongColUnaryMinus(col 7:bigint) -> 27:bigint, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 24:double)(children: FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 28:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 28:double) -> 24:double) -> 28:double) -> 24:double) -> 28:double, DoubleColUnaryMinus(col 24:double)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 24:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 24:double) -> 29:double) -> 24:double) -> 29:double) -> 24:double) -> 29:double) -> 24:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 28:double)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double, DoubleColUnaryMinus(col 28:double)(children: FuncPowerDoubleToDouble(col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 30:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double) -> 30:double) -> 28:double, CastLongToDouble(col 31:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 31:bigint) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 29:double)(children: DoubleColMultiplyDoubleScalar(col 28:double, val 10.175)(children: FuncPowerDoubleToDouble(col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 29:double)(children: DoubleColDivideLongColumn(col 28:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double) -> 29:double) -> 28:double, DoubleColDivideLongColumn(col 29:double, col 34:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 32:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 32:double) -> 29:double, IfExprNullCondExpr(col 31:boolean, null, col 33:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 31:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 33:bigint) -> 34:bigint) -> 32:double, DoubleColAddDoubleColumn(col 35:double, col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 35:double)(children: DoubleColDivideLongColumn(col 29:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 35:double) -> 29:double, IfExprNullCondExpr(col 34:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 34:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double, DoubleColMultiplyDoubleColumn(col 29:double, col 38:double)(children: DoubleColMultiplyDoubleColumn(col 38:double, col 39:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 29:double)(children: FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 38:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 29:double) -> 38:double) -> 29:double) -> 38:double) -> 29:double) -> 38:double, DoubleColUnaryMinus(col 29:double)(children: FuncPowerDoubleToDouble(col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 39:double)(children: DoubleColDivideLongColumn(col 29:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 29:double) -> 39:double) -> 29:double) -> 39:double) -> 29:double) -> 39:double) -> 29:double, CastLongToDouble(col 37:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 37:bigint) -> 38:double) -> 39:double) -> 29:double, DoubleColUnaryMinus(col 38:double)(children: DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 38:double)(children: DoubleColDivideLongColumn(col 35:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 38:double)(children: DoubleColDivideLongColumn(col 35:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 35:double) -> 38:double) -> 35:double) -> 38:double) -> 35:double) -> 38:double) -> 35:double, DoubleColDivideDoubleColumn(col 38:double, col 39:double)(children: CastLongToDouble(col 37:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 37:bigint) -> 38:double, FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 39:double) -> 40:double) -> 39:double) -> 40:double) -> 39:double) -> 40:double, DoubleColDivideLongColumn(col 38:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 38:double) -> 39:double) -> 38:double) -> 39:double, DoubleScalarDivideDoubleColumn(val 10.175, col 38:double)(children: LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 38:double) -> 41:double, LongColDivideLongColumn(col 14:bigint, col 4:bigint) -> 38:double, DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 43:double)(children: DoubleColDivideLongColumn(col 42:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 42:double) -> 43:double) -> 42:double, IfExprNullCondExpr(col 37:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 13:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 13:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 43:double, DoubleColSubtractDoubleColumn(col 42:double, col 50:double)(children: DoubleColAddDoubleColumn(col 46:double, col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 48:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 42:double) -> 46:double) -> 42:double, IfExprNullCondExpr(col 45:boolean, null, col 47:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 45:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 47:bigint) -> 48:bigint) -> 46:double, DoubleColMultiplyDoubleColumn(col 42:double, col 49:double)(children: DoubleColMultiplyDoubleColumn(col 49:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 42:double)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double, DoubleColUnaryMinus(col 42:double)(children: FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 50:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 50:double) -> 42:double) -> 50:double) -> 42:double) -> 50:double) -> 42:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 49:double) -> 50:double) -> 42:double, DoubleColMultiplyDoubleColumn(col 46:double, col 49:double)(children: DoubleColMultiplyDoubleColumn(col 49:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 46:double)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 46:double) -> 49:double) -> 46:double) -> 49:double) -> 46:double) -> 49:double, DoubleColUnaryMinus(col 46:double)(children: FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 50:double)(children: DoubleColDivideLongColumn(col 46:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 46:double) -> 50:double) -> 46:double) -> 50:double) -> 46:double) -> 50:double) -> 46:double, CastLongToDouble(col 48:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 48:bigint) -> 49:double) -> 50:double) -> 46:double, DoubleColUnaryMinus(col 42:double)(children: DoubleColUnaryMinus(col 49:double)(children: DoubleColMultiplyDoubleScalar(col 42:double, val 10.175)(children: FuncPowerDoubleToDouble(col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 49:double)(children: DoubleColDivideLongColumn(col 42:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double) -> 42:double) -> 49:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 42:double, DoubleColMultiplyDoubleScalar(col 51:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 50:double, col 55:double)(children: DoubleColAddDoubleColumn(col 51:double, col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 53:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 51:double)(children: DoubleColDivideLongColumn(col 50:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 50:double) -> 51:double) -> 50:double, IfExprNullCondExpr(col 48:boolean, null, col 52:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 48:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 52:bigint) -> 53:bigint) -> 51:double, DoubleColMultiplyDoubleColumn(col 50:double, col 54:double)(children: DoubleColMultiplyDoubleColumn(col 54:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 50:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 50:double) -> 54:double) -> 50:double) -> 54:double) -> 50:double) -> 54:double, DoubleColUnaryMinus(col 50:double)(children: FuncPowerDoubleToDouble(col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 55:double)(children: DoubleColDivideLongColumn(col 50:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 50:double) -> 55:double) -> 50:double) -> 55:double) -> 50:double) -> 55:double) -> 50:double, CastLongToDouble(col 53:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 53:bigint) -> 54:double) -> 55:double) -> 50:double, DoubleColMultiplyDoubleColumn(col 51:double, col 54:double)(children: DoubleColMultiplyDoubleColumn(col 54:double, col 55:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 51:double)(children: FuncPowerDoubleToDouble(col 54:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 54:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 51:double) -> 54:double) -> 51:double) -> 54:double) -> 51:double) -> 54:double, DoubleColUnaryMinus(col 51:double)(children: FuncPowerDoubleToDouble(col 55:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 55:double)(children: DoubleColDivideLongColumn(col 51:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 51:double) -> 55:double) -> 51:double) -> 55:double) -> 51:double) -> 55:double) -> 51:double, CastLongToDouble(col 53:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 53:bigint) -> 54:double) -> 55:double) -> 51:double) -> 50:double, DoubleScalarModuloDoubleColumn(val 10.175, col 54:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 51:double)(children: LongColDivideLongColumn(col 5:bigint, col 6:bigint) -> 51:double) -> 54:double) -> 51:double, LongColUnaryMinus(col 8:tinyint) -> 53:tinyint, DoubleColDivideLongColumn(col 54:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 55:double)(children: DoubleColDivideLongColumn(col 54:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 54:double) -> 55:double) -> 54:double) -> 55:double, DoubleColUnaryMinus(col 54:double)(children: DoubleColMultiplyDoubleColumn(col 56:double, col 57:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 54:double)(children: FuncPowerDoubleToDouble(col 56:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 56:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 56:double) -> 54:double) -> 56:double) -> 54:double) -> 56:double, DoubleColUnaryMinus(col 54:double)(children: FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 56:double, DoubleColModuloDoubleColumn(col 57:double, col 54:double)(children: DoubleColUnaryMinus(col 54:double)(children: FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 54:double) -> 58:double, DecimalScalarDivideDecimalColumn(val -26.28, col 60:decimal(3,0))(children: CastLongToDecimal(col 59:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 59:tinyint) -> 60:decimal(3,0)) -> 61:decimal(8,6), FuncPowerDoubleToDouble(col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 17:double, col 57:double)(children: DoubleColDivideLongColumn(col 54:double, col 19:bigint)(children: DoubleColMultiplyDoubleColumn(col 18:double, col 18:double) -> 54:double) -> 57:double) -> 54:double) -> 57:double) -> 54:double, DoubleColDivideDoubleColumn(col 57:double, col 65:double)(children: DoubleColAddDoubleColumn(col 62:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 64:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 62:double)(children: DoubleColDivideLongColumn(col 57:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 62:double) -> 57:double, IfExprNullCondExpr(col 59:boolean, null, col 63:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 59:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 63:bigint) -> 64:bigint) -> 62:double, DoubleColMultiplyDoubleColumn(col 57:double, col 65:double)(children: DoubleColMultiplyDoubleColumn(col 65:double, col 66:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 57:double)(children: FuncPowerDoubleToDouble(col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 65:double) -> 57:double) -> 65:double) -> 57:double) -> 65:double, DoubleColUnaryMinus(col 57:double)(children: FuncPowerDoubleToDouble(col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double, CastLongToDouble(col 64:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 64:bigint) -> 65:double) -> 66:double) -> 57:double, DoubleColDivideLongColumn(col 62:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 65:double)(children: DoubleColDivideLongColumn(col 62:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 62:double) -> 65:double) -> 62:double) -> 65:double) -> 62:double, LongColUnaryMinus(col 64:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 64:bigint) -> 67:bigint, DoubleColModuloDoubleScalar(col 57:double, val -26.28)(children: DoubleColAddDoubleColumn(col 65:double, col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 69:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 65:double)(children: DoubleColDivideLongColumn(col 57:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 65:double) -> 57:double, IfExprNullCondExpr(col 64:boolean, null, col 68:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 64:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 68:bigint) -> 69:bigint) -> 65:double, DoubleColMultiplyDoubleColumn(col 57:double, col 66:double)(children: DoubleColMultiplyDoubleColumn(col 66:double, col 70:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 57:double)(children: FuncPowerDoubleToDouble(col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 66:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double) -> 57:double) -> 66:double, DoubleColUnaryMinus(col 57:double)(children: FuncPowerDoubleToDouble(col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 70:double)(children: DoubleColDivideLongColumn(col 57:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 57:double) -> 70:double) -> 57:double) -> 70:double) -> 57:double) -> 70:double) -> 57:double, CastLongToDouble(col 69:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 69:bigint) -> 66:double) -> 70:double) -> 57:double) -> 65:double Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -3031,26 +3163,27 @@ STAGE PLANS: predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean) Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cboolean1 (type: boolean) - outputColumnNames: ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cboolean1 + expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 10] + projectedOutputColumnNums: [10, 4, 3, 2, 5, 0, 1, 13, 17, 15, 19, 16, 21, 18, 23] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 2:int) -> 15:double, CastLongToDouble(col 2:int) -> 16:double) -> 17:double, CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 18:double) -> 19:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 18:double, col 20:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 20:double, col 22:double)(children: CastLongToDouble(col 1:smallint) -> 20:double, CastLongToDouble(col 1:smallint) -> 22:double) -> 23:double Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint) + aggregations: max(_col1), sum(_col2), sum(_col8), sum(_col7), count(_col3), sum(_col4), count(_col4), min(_col2), sum(_col10), sum(_col9), count(_col2), sum(_col3), sum(_col12), sum(_col11), count(_col5), sum(_col14), sum(_col13), count(_col6) Group By Vectorization: - aggregators: VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_pop, VectorUDAFAvgLong(col 2:int) -> struct + aggregators: VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 21:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] - keys: cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] + keys: _col0 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) @@ -3061,7 +3194,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct), _col4 (type: struct), _col5 (type: bigint), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: bigint), _col8 (type: bigint), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: bigint), _col13 (type: double), _col14 (type: double), _col15 (type: bigint), _col16 (type: double), _col17 (type: double), _col18 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -3073,25 +3206,44 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "var_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceSampleEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), var_samp(VALUE._col2), avg(VALUE._col3), min(VALUE._col4), var_pop(VALUE._col5), sum(VALUE._col6), stddev_samp(VALUE._col7), stddev_pop(VALUE._col8), avg(VALUE._col9) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), sum(VALUE._col9), count(VALUE._col10), sum(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13), count(VALUE._col14), sum(VALUE._col15), sum(VALUE._col16), count(VALUE._col17) + Group By Vectorization: + aggregators: VectorUDAFMaxDouble(col 1:float) -> float, VectorUDAFSumLong(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCountMerge(col 5:bigint) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMinLong(col 8:bigint) -> bigint, VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFCountMerge(col 11:bigint) -> bigint, VectorUDAFSumLong(col 12:bigint) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCountMerge(col 15:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFCountMerge(col 18:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:boolean + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: KEY._col0 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28D / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553D / _col6) (type: double), (_col3 % (79.553D / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDouble(_col7)) (type: double) + expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28D / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), ((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), (_col6 / _col7) (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + ((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END)) (type: double), _col8 (type: bigint), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (- (10.175 + (- _col1))) (type: float), (79.553D / ((_col9 - ((_col10 * _col10) / _col11)) / _col11)) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / CASE WHEN ((_col5 = 1L)) THEN (null) ELSE ((_col5 - 1)) END) % (79.553D / ((_col9 - ((_col10 * _col10) / _col11)) / _col11))) (type: double), _col12 (type: bigint), power(((_col13 - ((_col14 * _col14) / _col15)) / CASE WHEN ((_col15 = 1L)) THEN (null) ELSE ((_col15 - 1)) END), 0.5) (type: double), (-1.389 * CAST( _col8 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col12 AS decimal(19,0)) - (-1.389 * CAST( _col8 AS decimal(19,0)))) (type: decimal(25,3)), power(((_col16 - ((_col17 * _col17) / _col18)) / _col18), 0.5) (type: double), (- (CAST( _col12 AS decimal(19,0)) - (-1.389 * CAST( _col8 AS decimal(19,0))))) (type: decimal(25,3)), (UDFToDouble(_col12) / _col5) (type: double), (- (UDFToDouble(_col12) / _col5)) (type: double), ((UDFToDouble(_col12) / _col5) * UDFToDouble(_col12)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 19, 20, 2, 22, 24, 23, 31, 28, 34, 8, 35, 33, 38, 43, 12, 39, 46, 49, 40, 52, 53, 50, 56] + selectExpressions: DoubleColUnaryMinus(col 1:float) -> 19:float, DoubleScalarDivideDoubleColumn(val -26.28, col 1:double)(children: col 1:float) -> 20:double, DecimalColSubtractDecimalScalar(col 21:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 21:decimal(19,0)) -> 22:decimal(23,3), DoubleColDivideLongColumn(col 23:double, col 27:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 24:double)(children: DoubleColDivideLongColumn(col 23:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 23:double) -> 24:double) -> 23:double, IfExprNullCondExpr(col 25:boolean, null, col 26:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 25:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 26:bigint) -> 27:bigint) -> 24:double, DoubleColModuloDoubleColumn(col 28:double, col 1:double)(children: DoubleColDivideLongColumn(col 23:double, col 30:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 28:double)(children: DoubleColDivideLongColumn(col 23:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 23:double) -> 28:double) -> 23:double, IfExprNullCondExpr(col 27:boolean, null, col 29:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 27:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 29:bigint) -> 30:bigint) -> 28:double, col 1:float) -> 23:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 28:float)(children: DoubleColUnaryMinus(col 1:float) -> 28:float) -> 31:float, DoubleColDivideLongColumn(col 6:double, col 7:bigint) -> 28:double, DoubleColAddDoubleColumn(col 33:double, col 35:double)(children: CastDecimalToDouble(col 32:decimal(23,3))(children: DecimalColSubtractDecimalScalar(col 21:decimal(19,0), val 10.175)(children: CastLongToDecimal(col 2:bigint) -> 21:decimal(19,0)) -> 32:decimal(23,3)) -> 33:double, DoubleColDivideLongColumn(col 34:double, col 37:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 35:double)(children: DoubleColDivideLongColumn(col 34:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 34:double) -> 35:double) -> 34:double, IfExprNullCondExpr(col 30:boolean, null, col 36:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 36:bigint) -> 37:bigint) -> 35:double) -> 34:double, DoubleColDivideLongColumn(col 33:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 35:double)(children: DoubleColDivideLongColumn(col 33:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 33:double) -> 35:double) -> 33:double) -> 35:double, DoubleColUnaryMinus(col 38:float)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 33:float)(children: DoubleColUnaryMinus(col 1:float) -> 33:float) -> 38:float) -> 33:float, DoubleScalarDivideDoubleColumn(val 79.553, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 38:double) -> 39:double) -> 38:double) -> 39:double) -> 38:double, DoubleColModuloDoubleColumn(col 40:double, col 39:double)(children: DoubleColDivideLongColumn(col 39:double, col 42:bigint)(children: DoubleColSubtractDoubleColumn(col 3:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 5:bigint)(children: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 39:double) -> 40:double) -> 39:double, IfExprNullCondExpr(col 37:boolean, null, col 41:bigint)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 37:boolean, LongColSubtractLongScalar(col 5:bigint, val 1) -> 41:bigint) -> 42:bigint) -> 40:double, DoubleScalarDivideDoubleColumn(val 79.553, col 43:double)(children: DoubleColDivideLongColumn(col 39:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 43:double)(children: DoubleColDivideLongColumn(col 39:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 39:double) -> 43:double) -> 39:double) -> 43:double) -> 39:double) -> 43:double, FuncPowerDoubleToDouble(col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 13:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 15:bigint)(children: DoubleColMultiplyDoubleColumn(col 14:double, col 14:double) -> 39:double) -> 40:double) -> 39:double, IfExprNullCondExpr(col 42:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 15:bigint, val 1) -> 42:boolean, LongColSubtractLongScalar(col 15:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 40:double) -> 39:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 21:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 21:decimal(19,0)) -> 46:decimal(24,3), DecimalColSubtractDecimalColumn(col 21:decimal(19,0), col 48:decimal(24,3))(children: CastLongToDecimal(col 12:bigint) -> 21:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 47:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 47:decimal(19,0)) -> 48:decimal(24,3)) -> 49:decimal(25,3), FuncPowerDoubleToDouble(col 50:double)(children: DoubleColDivideLongColumn(col 40:double, col 18:bigint)(children: DoubleColSubtractDoubleColumn(col 16:double, col 50:double)(children: DoubleColDivideLongColumn(col 40:double, col 18:bigint)(children: DoubleColMultiplyDoubleColumn(col 17:double, col 17:double) -> 40:double) -> 50:double) -> 40:double) -> 50:double) -> 40:double, FuncNegateDecimalToDecimal(col 51:decimal(25,3))(children: DecimalColSubtractDecimalColumn(col 21:decimal(19,0), col 48:decimal(24,3))(children: CastLongToDecimal(col 12:bigint) -> 21:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 47:decimal(19,0))(children: CastLongToDecimal(col 8:bigint) -> 47:decimal(19,0)) -> 48:decimal(24,3)) -> 51:decimal(25,3)) -> 52:decimal(25,3), DoubleColDivideLongColumn(col 50:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 50:double) -> 53:double, DoubleColUnaryMinus(col 54:double)(children: DoubleColDivideLongColumn(col 50:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 50:double) -> 54:double) -> 50:double, DoubleColMultiplyDoubleColumn(col 55:double, col 54:double)(children: DoubleColDivideLongColumn(col 54:double, col 5:bigint)(children: CastLongToDouble(col 12:bigint) -> 54:double) -> 55:double, CastLongToDouble(col 12:bigint) -> 54:double) -> 56:double Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double) Reducer 3 @@ -3212,8 +3364,8 @@ ORDER BY cboolean1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -false 11.0 -11.0 -2.389090909090909 -17881597706 -17881597716.175 3.8953387713327066E17 6.0 -0.8249998 -2454.8879999999995 3.8953385925167296E17 -2145884705 1.66288903197104486E18 0.8249998 4.7840233756130287E-17 4.098424268084119E-17 0.8249998 -1051696618 28.692556844886422 2980633855.245 -4032330473.245 85.79562278396777 4032330473.245 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 -true 79.553 -79.553 -0.33034580136836733 -401322621137 -401322621147.175 7.9255373737244976E16 34.727455139160156 -69.378 4856.6352637899645 7.9254972414623824E16 -2130544867 2.30133924842409523E18 69.378 3.456813247089758E-17 2.0387240975807185E-18 69.378 2182477964777 34.654968050508266 2959326820.263 2179518637956.737 9461.197516216069 -2179518637956.737 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 +false 11.0 -11.0 -2.389090909090909 -17881597706 -17881597716.175 3.8953387713327046E17 1.0 -0.8249998 -2454.8879999999995 3.8953385925167277E17 -2145884705 1.66288903197104486E18 0.8249998 4.7840233756130287E-17 3.8687857663039107E-17 0.8249998 -1051696618 28.692556844886425 2980633855.245 -4032330473.245 85.79562278396776 4032330473.245 -3983699.3106060605 3983699.3106060605 4.1896430920933255E15 +true 79.553 -79.553 -0.33034580136836733 -401322621137 -401322621147.175 7.9255373737242976E16 23.552490234375 -69.378 4856.6352637899645 7.9254972414621824E16 -2130544867 2.30133924842409984E18 69.378 3.456813247089751E-17 5.788274192367441E-19 69.378 2182477964777 34.65496805050828 2959326820.263 2179518637956.737 9461.197516216063 -2179518637956.737 4.592756659884259E8 -4.592756659884259E8 1.002359020778021E21 PREHOOK: query: create table test_count(i int) stored as orc PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 93ab21e6d9..5104c80bdb 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -114,25 +114,25 @@ STAGE PLANS: selectExpressions: LongColAddLongColumn(col 2:int, col 2:int) -> 13:int Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgLong(col 13:int) -> struct + aggregators: VectorUDAFCount(col 2:int) -> bigint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 13:int) -> bigint, VectorUDAFCount(col 13:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -155,27 +155,36 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: LongColDivideLongColumn(col 3:bigint, col 4:bigint) -> 5:double + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out index fa67ae7edd..417575ad02 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out @@ -125,14 +125,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Vectorization: @@ -143,26 +143,35 @@ STAGE PLANS: vectorized: true Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFAvgFinal(col 3:struct) -> double + aggregators: VectorUDAFCountMerge(col 0:bigint) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 2:int) -> int, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: bigint) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) + projectedOutputColumnNums: [0, 1, 2, 5] + selectExpressions: LongColDivideLongColumn(col 3:bigint, col 4:bigint) -> 5:double + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Reducer 4 Execution mode: vectorized Reduce Vectorization: @@ -179,13 +188,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out index 2fccd289a4..68b89a726e 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_timestamp_funcs.q.out @@ -1050,33 +1050,34 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + expressions: ctimestamp1 (type: timestamp), UDFToDouble(ctimestamp1) (type: double), (UDFToDouble(ctimestamp1) * UDFToDouble(ctimestamp1)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0:timestamp) -> struct, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: variance, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_samp, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: std, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_samp + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + projectedOutputColumnNums: [0, 1, 2, 3] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -1088,24 +1089,41 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + Execution mode: vectorized Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true - notVectorizedReason: GROUPBY operator: Vector aggregation : "variance" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFVarianceEvaluator - vectorized: false + allNative: false + usesVectorUDFAdaptor: true + vectorized: true Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + Group By Vectorization: + aggregators: VectorUDAFSumDouble(col 0:double) -> double, VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFSumDouble(col 3:double) -> double + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0, 1, 2, 3] mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col2 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col3 BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) + expressions: round((_col0 / _col1), 0) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5), 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5, 7, 8, 11, 6, 12, 13, 14] + selectExpressions: RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 0)(children: DoubleColDivideLongColumn(col 0:double, col 1:bigint) -> 4:double) -> 5:double, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 7:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 8:boolean, VectorUDFAdaptor(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D)(children: DoubleColDivideLongColumn(col 4:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double, IfExprNullCondExpr(col 9:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 9:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 6:double) -> 11:boolean, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 6:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double) -> 4:double) -> 6:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 12:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double) -> 4:double) -> 12:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 13:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double) -> 4:double) -> 13:double, RoundWithNumDigitsDoubleToDouble(col 4, decimalPlaces 3)(children: FuncPowerDoubleToDouble(col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 17:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 14:double)(children: DoubleColDivideLongColumn(col 4:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 4:double) -> 14:double) -> 4:double, IfExprNullCondExpr(col 15:boolean, null, col 16:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 15:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 16:bigint) -> 17:bigint) -> 14:double) -> 4:double) -> 14:double + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index b8a1f9057c..5cd757a019 100644 --- a/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -259,19 +259,19 @@ STAGE PLANS: selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double Statistics: Num rows: 12288 Data size: 110096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + @@ -282,10 +282,10 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true partitionColumnNums: [0] - valueColumnNums: [1] - Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE + valueColumnNums: [1, 2] + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -313,41 +313,50 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY._col0:tinyint, VALUE._col0:struct + dataColumnCount: 3 + dataColumns: KEY._col0:tinyint, VALUE._col0:double, VALUE._col1:bigint partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) Group By Vectorization: - aggregators: VectorUDAFAvgFinal(col 1:struct) -> double + aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFCountMerge(col 2:bigint) -> bigint className: VectorGroupByOperator groupByMode: MERGEPARTIAL keyExpressions: col 0:tinyint native: false vectorProcessingMode: MERGE_PARTIAL - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 20 - Limit Vectorization: - className: VectorLimitOperator + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 128 Data size: 2436 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false + projectedOutputColumnNums: [0, 3] + selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double + Statistics: Num rows: 128 Data size: 1412 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 20 + Limit Vectorization: + className: VectorLimitOperator + native: true Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 20 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out index af058a44e4..e062db21fc 100644 --- a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out +++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out @@ -379,34 +379,38 @@ STAGE PLANS: alias: t Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: px (type: int), y (type: decimal(10,0)), x (type: decimal(10,0)) - outputColumnNames: px, y, x + expressions: px (type: int), x (type: decimal(10,0)), y (type: decimal(10,0)), UDFToDouble(x) (type: double), (UDFToDouble(x) * UDFToDouble(x)) (type: double), UDFToDouble(y) (type: double), (UDFToDouble(y) * UDFToDouble(y)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(x), var_pop(y), corr(y, x), covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x) - keys: px (type: int) + aggregations: sum(_col4), sum(_col3), count(_col1), sum(_col6), sum(_col5), count(_col2), corr(_col2, _col1), covar_samp(_col2, _col1), covar_pop(_col2, _col1), regr_count(_col2, _col1), regr_slope(_col2, _col1), regr_intercept(_col2, _col1), regr_r2(_col2, _col1), regr_sxx(_col2, _col1), regr_syy(_col2, _col1), regr_sxy(_col2, _col1), regr_avgx(_col2, _col1), regr_avgy(_col2, _col1) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: bigint), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct) Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), corr(VALUE._col6), covar_samp(VALUE._col7), covar_pop(VALUE._col8), regr_count(VALUE._col9), regr_slope(VALUE._col10), regr_intercept(VALUE._col11), regr_r2(VALUE._col12), regr_sxx(VALUE._col13), regr_syy(VALUE._col14), regr_sxy(VALUE._col15), regr_avgx(VALUE._col16), regr_avgy(VALUE._col17) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18 Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: int), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), ((_col4 - ((_col5 * _col5) / _col6)) / _col6) (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: bigint), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: decimal(14,4)), _col18 (type: decimal(14,4)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out b/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out new file mode 100644 index 0000000000..6185693ade --- /dev/null +++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out @@ -0,0 +1,522 @@ +PREHOOK: query: drop table t +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table t +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table t (id int,px int,y decimal,x decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (id int,px int,y decimal,x decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values (101,1,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (101,1,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (201,2,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (201,2,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (301,3,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (301,3,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (401,4,1,11) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (401,4,1,11) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (501,5,1,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (501,5,1,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (601,6,null,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (601,6,null,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (701,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (701,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (102,1,2,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (102,1,2,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (202,2,1,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (202,2,1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (302,3,2,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (302,3,2,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (402,4,2,12) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (402,4,2,12) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (502,5,2,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (502,5,2,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (602,6,null,2) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (602,6,null,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (702,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (702,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (103,1,3,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (103,1,3,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (203,2,1,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (203,2,1,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (303,3,3,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (303,3,3,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (403,4,3,13) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (403,4,3,13) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (503,5,3,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (503,5,3,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (603,6,null,3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (603,6,null,3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (703,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (703,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (104,1,4,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (104,1,4,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (204,2,1,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (204,2,1,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (304,3,4,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (304,3,4,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (404,4,4,14) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (404,4,4,14) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (504,5,4,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (504,5,4,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (604,6,null,4) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (604,6,null,4) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (704,6,null,null) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (704,6,null,null) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: insert into t values (800,7,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values (800,7,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.id SCRIPT [] +POSTHOOK: Lineage: t.px SCRIPT [] +POSTHOOK: Lineage: t.x SCRIPT [] +POSTHOOK: Lineage: t.y SCRIPT [] +PREHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px +PREHOOK: type: QUERY +POSTHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x), +regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x) + from t group by px order by px +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: px (type: int), y (type: decimal(10,0)), x (type: decimal(10,0)) + outputColumnNames: px, y, x + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: var_pop(x), var_pop(y), corr(y, x), covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x) + keys: px (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(14,4)), _col14 (type: decimal(14,4)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: decimal(14,4)), VALUE._col13 (type: decimal(14,4)), VALUE._col5 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select px, + round( var_pop(x),5), + round( var_pop(y),5), + round( corr(y,x),5), + round( covar_samp(y,x),5), + round( covar_pop(y,x),5), + regr_count(y,x), + round( regr_slope(y,x),5), + round( regr_intercept(y,x),5), + round( regr_r2(y,x),5), + round( regr_sxx(y,x),5), + round( regr_syy(y,x),5), + round( regr_sxy(y,x),5), + round( regr_avgx(y,x),5), + round( regr_avgy(y,x),5), + round( regr_count(y,x),5) + from t group by px order by px +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +1 1.25 1.25 1.0 1.66667 1.25 4 1.0 0.0 1.0 5.0 5.0 5.0 2.50000 2.50000 4 +2 1.25 0.0 NULL 0.0 0.0 4 0.0 1.0 1.0 5.0 0.0 0.0 2.50000 1.00000 4 +3 0.0 1.25 NULL 0.0 0.0 4 NULL NULL NULL 0.0 5.0 0.0 1.00000 2.50000 4 +4 1.25 1.25 1.0 1.66667 1.25 4 1.0 -10.0 1.0 5.0 5.0 5.0 12.50000 2.50000 4 +5 NULL 1.25 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0 +6 1.25 NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0 +7 0.0 0.0 NULL NULL 0.0 1 NULL NULL NULL 0.0 0.0 0.0 1.00000 1.00000 1 +PREHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +101 4 +102 4 +103 4 +104 4 +201 4 +202 4 +203 4 +204 4 +301 4 +302 4 +303 4 +304 4 +401 4 +402 4 +403 4 +404 4 +501 0 +502 0 +503 0 +504 0 +601 0 +602 0 +603 0 +604 0 +701 0 +702 0 +703 0 +704 0 +800 1 diff --git a/ql/src/test/results/clientpositive/udaf_number_format.q.out b/ql/src/test/results/clientpositive/udaf_number_format.q.out index 14ab23dc0e..3ae86d29d6 100644 --- a/ql/src/test/results/clientpositive/udaf_number_format.q.out +++ b/ql/src/test/results/clientpositive/udaf_number_format.q.out @@ -26,27 +26,31 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum('a'), avg('a'), variance('a'), std('a') + aggregations: sum('a'), count(), sum(null), sum(null) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), avg(VALUE._col1), variance(VALUE._col2), std(VALUE._col3) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: double), (_col0 / _col1) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -72,4 +76,4 @@ FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -0.0 NULL NULL NULL +0.0 0.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/udf3.q.out b/ql/src/test/results/clientpositive/udf3.q.out index 9f9f56fe02..5069e43053 100644 --- a/ql/src/test/results/clientpositive/udf3.q.out +++ b/ql/src/test/results/clientpositive/udf3.q.out @@ -29,22 +29,22 @@ STAGE PLANS: Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count(null), sum(null), avg(null), min(null), max(null) + aggregations: count(null), sum(null), min(null), max(null) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct), _col3 (type: int), _col4 (type: int) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: int), _col3 (type: int) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), min(VALUE._col3), max(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), min(VALUE._col2), max(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToString(_col0) (type: string), UDFToString(_col1) (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string), UDFToString(_col4) (type: string) + expressions: UDFToString(_col0) (type: string), UDFToString(_col1) (type: string), UDFToString((UDFToDouble(_col1) / _col0)) (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/udf8.q.out b/ql/src/test/results/clientpositive/udf8.q.out index d75cd78217..fbb113ca76 100644 --- a/ql/src/test/results/clientpositive/udf8.q.out +++ b/ql/src/test/results/clientpositive/udf8.q.out @@ -46,27 +46,31 @@ STAGE PLANS: outputColumnNames: c1 Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(c1), sum(c1), count(c1) + aggregations: sum(c1), count(c1) mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: bigint) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double), _col0 (type: double), _col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out index 096d98db3a..7e2b4501ed 100644 --- a/ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ b/ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -322,39 +322,43 @@ STAGE PLANS: Union Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) TableScan Union Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: avg(_col0) + aggregations: sum(_col0), count(_col0) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Map Reduce diff --git a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 6cf99bb0b8..0f7fcc16c2 100644 --- a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -134,9 +134,9 @@ STAGE PLANS: projectedOutputColumnNums: [6] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(dc), max(dc), sum(dc), avg(dc) + aggregations: min(dc), max(dc), sum(dc), count(dc) Group By Vectorization: - aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 6:decimal(38,18)) -> struct + aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 6:decimal(38,18)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -144,7 +144,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -152,8 +152,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -176,17 +176,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), (_col2 / _col3) (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -236,9 +240,9 @@ STAGE PLANS: projectedOutputColumnNums: [5] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(d), max(d), sum(d), avg(d) + aggregations: min(d), max(d), sum(d), count(d) Group By Vectorization: - aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -246,7 +250,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -254,8 +258,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -278,17 +282,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -338,9 +346,9 @@ STAGE PLANS: projectedOutputColumnNums: [10] Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: min(ts), max(ts), sum(ts), avg(ts) + aggregations: min(ts), max(ts), sum(ts), count(ts) Group By Vectorization: - aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFAvgTimestamp(col 10:timestamp) -> struct + aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFCount(col 10:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -348,7 +356,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -356,8 +364,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -380,17 +388,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3) + aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), (_col2 / _col3) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/vector_cast_constant.q.out index 4b06016702..3d3d761932 100644 --- a/ql/src/test/results/clientpositive/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/vector_cast_constant.q.out @@ -136,18 +136,18 @@ STAGE PLANS: projectedOutputColumnNums: [2] Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(50), avg(50.0D), avg(50) + aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50) Group By Vectorization: - aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 12:int) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 14:decimal(10,0)) -> struct + aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 2:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -160,7 +160,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -177,17 +177,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: int), (_col1 / _col2) (type: double), (_col3 / _col4) (type: double), CAST( (_col5 / _col6) AS decimal(6,4)) (type: decimal(6,4)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -205,7 +209,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 - value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(6,4)) Execution mode: vectorized Map Vectorization: enabled: true @@ -222,7 +226,7 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4)) + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(6,4)) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE Limit diff --git a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out index c17951a738..16c80f0289 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_aggregate.q.out @@ -208,26 +208,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10] + selectExpressions: CastDecimalToDouble(col 1:decimal(20,10)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(20,10)) -> 6:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(23,14)) -> 7:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 9:double) -> 10:double Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFAvgDecimal(col 1:decimal(20,10)) -> struct, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFAvgDecimal(col 2:decimal(23,14)) -> struct, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -239,7 +240,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -255,23 +256,23 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(20,10), cdecimal2:decimal(23,14), cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 1082441 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), (CAST( _col4 AS decimal(24,14)) / _col1) (type: decimal(38,28)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), (CAST( _col10 AS decimal(27,18)) / _col7) (type: decimal(38,29)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2048 Data size: 360813 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -306,14 +307,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby #### A masked pattern was here #### --3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054 --563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.63641486490000 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.503273076922950000 1707.9424961538462 2415.395441814127 -253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274 -528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984 -626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603 -6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653 -762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118758 -NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924 +-3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.6735247296333333333333333333 2174330.209240386 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.81120730764118333333333333333 2604201.2704476737 2852759.5602156054 +-563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649000000000000000000 1426.0153418918997 2016.6902366556305 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.50327307692295000000000000000 1707.9424961538462 2415.395441814127 +253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.3399236697630859375000000000 5708.956347886203 5711.745967572781 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.42835967548079188476562500000 6837.632716002931 6840.973851172272 +528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.4109968243230468750000000000 257528.9298820665 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.96562480769168948242187500000 308443.1074570797 308593.82484083937 +626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.2939966110631835937500000000 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.32914804687497798828125000000 6877.318722794881 6880.679250101608 +6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.6757008106666666666666666667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.69651461528206666666666666667 3292794.4113115156 4032833.0678006653 +762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917000000000000000000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.07839499984625000000000000000 3491310.1327026924 4937458.140118757 +NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.6081081080666666666666666667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.57692307692296666666666666667 6821.495748565151 6822.606289190915 PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1, CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2, @@ -543,26 +544,27 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5)/DECIMAL_64, 2:cdecimal2:decimal(16,0)/DECIMAL_64, 3:cint:int, 4:ROW__ID:struct] Select Operator - expressions: cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), cint (type: int) - outputColumnNames: cdecimal1, cdecimal2, cint + expressions: cint (type: int), cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [1, 2, 3] + projectedOutputColumnNums: [3, 1, 2, 6, 9, 7, 12] + selectExpressions: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 8:double)(children: CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 7:double, CastDecimalToDouble(col 5:decimal(11,5))(children: ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> 8:double) -> 9:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 7:double, DoubleColMultiplyDoubleColumn(col 8:double, col 11:double)(children: CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 8:double, CastDecimalToDouble(col 10:decimal(16,0))(children: ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 10:decimal(16,0)) -> 11:double) -> 12:double Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count() + aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count() Group By Vectorization: - aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFAvgDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> struct, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 5:decimal(11,5)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> 6:decimal(11,5)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFAvgDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> struct, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 7:decimal(16,0)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(ConvertDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> 8:decimal(16,0)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint + aggregators: VectorUDAFCount(col 1:decimal(11,5)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFMinDecimal64(col 1:decimal(11,5)/DECIMAL_64) -> decimal(11,5)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 1:decimal(11,5)/DECIMAL_64) -> decimal(21,5), VectorUDAFSumDouble(col 9:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCount(col 2:decimal(16,0)/DECIMAL_64) -> bigint, VectorUDAFMaxDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFMinDecimal64(col 2:decimal(16,0)/DECIMAL_64) -> decimal(16,0)/DECIMAL_64, VectorUDAFSumDecimal64ToDecimal(col 2:decimal(16,0)/DECIMAL_64) -> decimal(26,0), VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 7:double) -> double, VectorUDAFCountStar(*) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 3:int native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] - keys: cint (type: int) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) @@ -574,7 +576,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) + value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -590,23 +592,23 @@ STAGE PLANS: includeColumns: [1, 2, 3] dataColumns: cdouble:double, cdecimal1:decimal(11,5)/DECIMAL_64, cdecimal2:decimal(16,0)/DECIMAL_64, cint:int partitionColumnCount: 0 - scratchColumnTypeNames: [decimal(11,5), decimal(11,5), decimal(16,0), decimal(16,0)] + scratchColumnTypeNames: [decimal(11,5), double, double, double, double, decimal(16,0), double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12) keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 Statistics: Num rows: 6144 Data size: 173221 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col15 > 1L) (type: boolean) + predicate: (_col13 > 1L) (type: boolean) Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double) + expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), (CAST( _col4 AS decimal(15,9)) / _col1) (type: decimal(35,29)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), CAST( (CAST( _col10 AS decimal(20,4)) / _col7) AS decimal(20,4)) (type: decimal(20,4)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2048 Data size: 57740 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -641,14 +643,14 @@ POSTHOOK: query: SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### --3728 5 -515.62107 -3367.65176 -13986.22811 -2797.245622000 1140.812276 1275.466899351126 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 --563 2 -515.62107 -3367.65176 -3883.27283 -1941.636415000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 -253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.339923750 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 -528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.351935137 5555.7621107931345 5558.482190324908 1024 6984454 -11710 13948890 13621.9629 308443.09823296947 308593.8156122219 -626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325366 5744.897264122336 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 -6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 -762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 -NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 +-3728 5 -515.62107 -3367.65176 -13986.22811 -2797.24562200000000000000000000000 1140.8122759999992 1275.466899351125 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621 +-563 2 -515.62107 -3367.65176 -3883.27283 -1941.63641500000000000000000000000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596 +253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.33992375000000000000000000000 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613 +528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.35193513698630136986301369863 5555.762110793133 5558.482190324906 1024 6984454 -11710 13948890 13621.9629 308443.0982329696 308593.815612222 +626923679 1024 9723.40270 -9778.95135 10541.05247 10.29399655273437500000000000000 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185 +6981 2 -515.62107 -515.62107 -1031.24214 -515.62107000000000000000000000000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175 +762 1 1531.21941 1531.21941 1531.21941 1531.21941000000000000000000000000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881 +NULL 3072 9318.43514 -4298.15135 5018444.11392 NULL 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734 PREHOOK: query: SELECT SUM(HASH(*)) FROM (SELECT cint, COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1), @@ -667,4 +669,4 @@ FROM (SELECT cint, POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_vgby_small #### A masked pattern was here #### -91757235680 +96673467876 diff --git a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out index 6e41f536de..fd6d9c3b9f 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_precision.q.out @@ -580,9 +580,9 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(dec), sum(dec) + aggregations: sum(dec), count(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -590,7 +590,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -598,8 +598,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -622,17 +622,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -648,7 +652,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_precision #### A masked pattern was here #### -88499534.57586576220645 2743485571.8518386284 +88499534.575865762206451613 2743485571.8518386284 PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_precision @@ -1161,9 +1165,9 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(dec), sum(dec) + aggregations: sum(dec), count(dec) Group By Vectorization: - aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10) + aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false @@ -1171,7 +1175,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1] mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -1179,8 +1183,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -1203,17 +1207,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1229,7 +1237,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION_txt_small POSTHOOK: type: QUERY POSTHOOK: Input: default@decimal_precision_txt_small #### A masked pattern was here #### -88499534.57586576220645 2743485571.8518386284 +88499534.575865762206451613 2743485571.8518386284 PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION_txt_small LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@decimal_precision_txt_small diff --git a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index 5ac1ea8985..8a6135eada 100644 --- a/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -777,7 +777,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3, _col4 Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) + aggregations: sum(_col1), sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4) keys: 1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -805,7 +805,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct) + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: decimal(38,18)), _col7 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -822,13 +822,13 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), sum(VALUE._col5), avg(VALUE._col6) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + expressions: 1 (type: int), _col1 (type: bigint), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 125 Data size: 30150 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -975,13 +975,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 500 Data size: 120602 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4) + aggregations: sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4) keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(38,18)), _col7 (type: decimal(38,18)) + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint), (UDFToDouble(_col2) / _col3) (type: double), _col4 (type: double), (_col4 / _col5) (type: double), _col6 (type: decimal(38,18)), (_col6 / _col7) (type: decimal(38,18)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 250 Data size: 60301 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_1.q.out b/ql/src/test/results/clientpositive/vectorization_1.q.out index 97334a3fb2..bb8e483e6a 100644 --- a/ql/src/test/results/clientpositive/vectorization_1.q.out +++ b/ql/src/test/results/clientpositive/vectorization_1.q.out @@ -66,25 +66,26 @@ STAGE PLANS: predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (UDFToLong(cint) > cbigint) or (cbigint < UDFToLong(ctinyint)) or (cboolean1 < 0)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, cint, cfloat, cdouble + expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5] + projectedOutputColumnNums: [0, 4, 2, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 0:tinyint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double, CastLongToDouble(col 0:tinyint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_samp, VectorUDAFCount(col 2:int) -> bigint + aggregators: VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFMaxLong(col 2:int) -> int, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -92,8 +93,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: tinyint), _col5 (type: int), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -109,24 +110,24 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: var_pop(VALUE._col0), sum(VALUE._col1), max(VALUE._col2), max(VALUE._col3), var_samp(VALUE._col4), count(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), max(VALUE._col4), max(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), count(VALUE._col9) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 / -26.28D) (type: double), _col1 (type: double), (-1.389D + _col1) (type: double), (_col1 * (-1.389D + _col1)) (type: double), _col2 (type: tinyint), (- (_col1 * (-1.389D + _col1))) (type: double), _col3 (type: int), (CAST( _col3 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), _col4 (type: double), (10.175D % (- (_col1 * (-1.389D + _col1)))) (type: double), _col5 (type: bigint), (-563 % _col3) (type: int) + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), (((_col0 - ((_col1 * _col1) / _col2)) / _col2) / -26.28D) (type: double), _col3 (type: double), (-1.389D + _col3) (type: double), (_col3 * (-1.389D + _col3)) (type: double), _col4 (type: tinyint), (- (_col3 * (-1.389D + _col3))) (type: double), _col5 (type: int), (CAST( _col5 AS decimal(10,0)) * 79.553) (type: decimal(16,3)), ((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END) (type: double), (10.175D % (- (_col3 * (-1.389D + _col3)))) (type: double), _col9 (type: bigint), (-563 % _col5) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,4 +183,4 @@ WHERE (((cdouble > ctinyint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -1074.830257547229 -40.89917266161449 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620903E10 10.175 3745 -563 +1074.8302575472321 -40.899172661614614 -21997.674998402596 -21999.063998402595 4.839282601059194E8 62 -4.839282601059194E8 1073680599 85414512692.247 7.569848642620917E10 10.175 3745 -563 diff --git a/ql/src/test/results/clientpositive/vectorization_12.q.out b/ql/src/test/results/clientpositive/vectorization_12.q.out index ba0882f447..e1297300c0 100644 --- a/ql/src/test/results/clientpositive/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/vectorization_12.q.out @@ -89,26 +89,27 @@ STAGE PLANS: predicate: (((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ctimestamp1 is null) (type: boolean) Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean) - outputColumnNames: cbigint, cdouble, cstring1, cboolean1 + expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [3, 5, 6, 10] + projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 14] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 14:double Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble) + aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: - aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: stddev_samp, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop + aggregators: VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumLong(col 3:bigint) -> bigint, VectorUDAFSumDouble(col 14:double) -> double className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 5:double, col 3:bigint, col 6:string, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4] - keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + keys: _col3 (type: double), _col0 (type: bigint), _col2 (type: string), _col1 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) @@ -120,7 +121,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) + value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -136,20 +137,20 @@ STAGE PLANS: includeColumns: [0, 1, 3, 5, 6, 8, 10, 11] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), avg(VALUE._col2), sum(VALUE._col3), stddev_pop(VALUE._col4) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6) keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), _col6 (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), _col8 (type: double) + expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 Statistics: Num rows: 1877 Data size: 403561 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_13.q.out b/ql/src/test/results/clientpositive/vectorization_13.q.out index e41a0d7e52..96eda74911 100644 --- a/ql/src/test/results/clientpositive/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/vectorization_13.q.out @@ -91,26 +91,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > 11.0D) and (UDFToDouble(ctimestamp2) <> 12.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -122,7 +123,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -138,20 +139,20 @@ STAGE PLANS: includeColumns: [0, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(11,4)] + scratchColumnTypeNames: [double, decimal(11,4), double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -420,26 +421,27 @@ STAGE PLANS: predicate: (((UDFToDouble(ctimestamp1) > -1.388D) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639)) or ((cfloat < 3569) and (cdouble <= 10.175D) and (cboolean1 <> 1))) (type: boolean) Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1 + expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 4, 6, 8, 10] + projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 13, 15, 18] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 0:tinyint) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double) -> 18:double Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint) + aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFMaxLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 10:boolean, col 0:tinyint, col 8:timestamp, col 4:float, col 6:string native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) @@ -451,7 +453,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) + value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -468,13 +470,13 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), sum(VALUE._col1), stddev_pop(VALUE._col2), stddev_pop(VALUE._col3), max(VALUE._col4), min(VALUE._col5) + aggregations: max(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint) + expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_14.q.out b/ql/src/test/results/clientpositive/vectorization_14.q.out index 02a986c111..7a7a8170ac 100644 --- a/ql/src/test/results/clientpositive/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/vectorization_14.q.out @@ -91,27 +91,27 @@ STAGE PLANS: predicate: (((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and (UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint))) (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [8, 4, 6, 10, 5, 14] - selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double + projectedOutputColumnNums: [8, 4, 6, 10, 5, 14, 13, 4, 15] + selectExpressions: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 15:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 13:double) -> 16:double) -> 13:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 15:double Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) + aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 14:double) -> struct aggregation: stddev_samp, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_pop, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: var_samp + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 14:double) -> bigint, VectorUDAFMaxDouble(col 4:float) -> float, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 6:string, col 4:float, col 5:double, col 8:timestamp, col 10:boolean native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) @@ -123,7 +123,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE - value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) + value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -139,20 +139,20 @@ STAGE PLANS: includeColumns: [0, 2, 3, 4, 5, 6, 8, 9, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), max(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6) keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175D) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_15.q.out b/ql/src/test/results/clientpositive/vectorization_15.q.out index a8d681b06e..dbef3e7d45 100644 --- a/ql/src/test/results/clientpositive/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/vectorization_15.q.out @@ -87,26 +87,27 @@ STAGE PLANS: predicate: (((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D)) or (cstring1 like '10%') or (cstring2 like '%ss%')) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean) - outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1 + expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 4, 5, 6, 8, 10] + projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 15, 19] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 18:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 18:double) -> 19:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint) + aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: - aggregators: VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: var_pop, VectorUDAFVarLong(col 2:int) -> struct aggregation: var_samp, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 14:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 19:double) -> double, VectorUDAFSumDouble(col 15:double) -> double, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 4:float, col 10:boolean, col 5:double, col 6:string, col 0:tinyint, col 2:int, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] + keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) @@ -118,7 +119,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) + value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -134,19 +135,19 @@ STAGE PLANS: includeColumns: [0, 1, 2, 4, 5, 6, 7, 8, 10] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double, double, double, double] Reduce Vectorization: enabled: false enableConditionsNotMet: hive.vectorized.execution.reduce.enabled IS false, hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), min(VALUE._col1), stddev_samp(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), sum(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double) + expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_16.q.out b/ql/src/test/results/clientpositive/vectorization_16.q.out index 97c6ffbec9..571eae0f15 100644 --- a/ql/src/test/results/clientpositive/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/vectorization_16.q.out @@ -64,38 +64,39 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -111,20 +112,20 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_2.q.out b/ql/src/test/results/clientpositive/vectorization_2.q.out index 1df05af721..e3d6ad0cbc 100644 --- a/ql/src/test/results/clientpositive/vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/vectorization_2.q.out @@ -70,25 +70,26 @@ STAGE PLANS: predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double) - outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble + expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 3, 4, 5] + projectedOutputColumnNums: [1, 4, 3, 0, 5, 13, 16] + selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble) + aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: - aggregators: VectorUDAFAvgLong(col 1:smallint) -> struct, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFVarLong(col 3:bigint) -> struct aggregation: var_pop, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFAvgDouble(col 5:double) -> struct + aggregators: VectorUDAFSumLong(col 1:smallint) -> bigint, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 3:bigint) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -96,8 +97,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -113,24 +114,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 7, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), sum(VALUE._col1), var_pop(VALUE._col2), count(VALUE._col3), min(VALUE._col4), avg(VALUE._col5) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), count(VALUE._col6), min(VALUE._col7), sum(VALUE._col8), count(VALUE._col9) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 % -563.0D) (type: double), (_col0 + 762.0D) (type: double), _col1 (type: double), _col2 (type: double), (- _col2) (type: double), (_col1 - _col0) (type: double), _col3 (type: bigint), (- (_col1 - _col0)) (type: double), (_col2 - 762.0D) (type: double), _col4 (type: tinyint), ((- _col2) + UDFToDouble(_col4)) (type: double), _col5 (type: double), (((- _col2) + UDFToDouble(_col4)) - _col1) (type: double) + expressions: (_col0 / _col1) (type: double), ((_col0 / _col1) % -563.0D) (type: double), ((_col0 / _col1) + 762.0D) (type: double), _col2 (type: double), ((_col3 - ((_col4 * _col4) / _col5)) / _col5) (type: double), (- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) (type: double), (_col2 - (_col0 / _col1)) (type: double), _col6 (type: bigint), (- (_col2 - (_col0 / _col1))) (type: double), (((_col3 - ((_col4 * _col4) / _col5)) / _col5) - 762.0D) (type: double), _col7 (type: tinyint), ((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) (type: double), (_col8 / _col9) (type: double), (((- ((_col3 - ((_col4 * _col4) / _col5)) / _col5)) + UDFToDouble(_col7)) - _col2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +191,4 @@ WHERE (((ctimestamp1 < ctimestamp2) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378778E18 -1.49936299222378778E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378701E18 -64 -1.49936299222378778E18 -5650.1297631138395 -1.49936299222378496E18 +-5646.467075892857 -16.467075892856883 -4884.467075892857 -2839.634998679161 1.49936299222378906E18 -1.49936299222378906E18 2806.832077213696 3584 -2806.832077213696 1.49936299222378829E18 -64 -1.49936299222378906E18 -5650.1297631138395 -1.49936299222378624E18 diff --git a/ql/src/test/results/clientpositive/vectorization_3.q.out b/ql/src/test/results/clientpositive/vectorization_3.q.out index 0cee2546f6..bb6c0147ae 100644 --- a/ql/src/test/results/clientpositive/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/vectorization_3.q.out @@ -75,25 +75,26 @@ STAGE PLANS: predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean) Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float) - outputColumnNames: ctinyint, csmallint, cint, cfloat + expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 4] + projectedOutputColumnNums: [1, 0, 4, 2, 13, 18, 16, 20, 4, 17, 19, 23] + selectExpressions: CastLongToDouble(col 1:smallint) -> 13:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, CastLongToDouble(col 0:tinyint) -> 16:double, DoubleColMultiplyDoubleColumn(col 17:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 17:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 17:double, CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 21:double, col 22:double)(children: CastLongToDouble(col 2:int) -> 21:double, CastLongToDouble(col 2:int) -> 22:double) -> 23:double Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint) + aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: - aggregators: VectorUDAFVarLong(col 1:smallint) -> struct aggregation: stddev_samp, VectorUDAFVarLong(col 0:tinyint) -> struct aggregation: stddev_pop, VectorUDAFVarDouble(col 4:float) -> struct aggregation: stddev_samp, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: stddev_pop + aggregators: VectorUDAFSumDouble(col 18:double) -> double, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 1:smallint) -> bigint, VectorUDAFSumDouble(col 20:double) -> double, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFCount(col 0:tinyint) -> bigint, VectorUDAFSumDouble(col 17:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:float) -> bigint, VectorUDAFSumDouble(col 4:float) -> double, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 23:double) -> double, VectorUDAFSumDouble(col 19:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -101,8 +102,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: double), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: double), _col13 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -118,24 +119,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 4, 5, 8, 9] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3)] + scratchColumnTypeNames: [double, decimal(22,3), decimal(8,3), double, double, double, double, double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: stddev_samp(VALUE._col0), stddev_pop(VALUE._col1), stddev_samp(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), count(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7), count(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), count(VALUE._col11), sum(VALUE._col12), sum(VALUE._col13) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), (_col0 - 10.175D) (type: double), _col1 (type: double), (_col0 * (_col0 - 10.175D)) (type: double), (- _col1) (type: double), (_col0 % 79.553D) (type: double), (- (_col0 * (_col0 - 10.175D))) (type: double), _col2 (type: double), (- _col0) (type: double), _col3 (type: double), ((- (_col0 * (_col0 - 10.175D))) / (_col0 - 10.175D)) (type: double), (- (_col0 - 10.175D)) (type: double), _col4 (type: double), (-3728.0D - _col0) (type: double), _col5 (type: double), (_col4 / _col2) (type: double) + expressions: power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D) (type: double), power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- power(((_col3 - ((_col4 * _col4) / _col5)) / _col5), 0.5)) (type: double), (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) % 79.553D) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5) (type: double), (- power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), _col9 (type: double), ((- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) * (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D))) / (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (- (power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) - 10.175D)) (type: double), (_col10 / _col11) (type: double), (-3728.0D - power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5)) (type: double), power(((_col12 - ((_col13 * _col13) / _col11)) / _col11), 0.5) (type: double), ((_col10 / _col11) / power(((_col6 - ((_col7 * _col7) / _col8)) / CASE WHEN ((_col8 = 1L)) THEN (null) ELSE ((_col8 - 1)) END), 0.5)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -200,4 +201,4 @@ WHERE (((cint <= cfloat) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.34690095515641 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL +0.0 -10.175 34.287285216637066 -0.0 -34.287285216637066 0.0 0.0 34.3469009551564 -0.0 197.89499950408936 -0.0 10.175 NULL -3728.0 NULL NULL diff --git a/ql/src/test/results/clientpositive/vectorization_4.q.out b/ql/src/test/results/clientpositive/vectorization_4.q.out index 014750bd28..395431c2bc 100644 --- a/ql/src/test/results/clientpositive/vectorization_4.q.out +++ b/ql/src/test/results/clientpositive/vectorization_4.q.out @@ -70,17 +70,18 @@ STAGE PLANS: predicate: (((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D)) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or (UDFToInteger(csmallint) >= cint)) (type: boolean) Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double) - outputColumnNames: ctinyint, cint, cdouble + expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 2, 5] + projectedOutputColumnNums: [2, 5, 0, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint) + aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_pop, VectorUDAFAvgDouble(col 5:double) -> struct, VectorUDAFVarDouble(col 5:double) -> struct aggregation: var_pop, VectorUDAFMinLong(col 0:tinyint) -> tinyint + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint, VectorUDAFMinLong(col 0:tinyint) -> tinyint className: VectorGroupByOperator groupByMode: HASH native: false @@ -88,7 +89,7 @@ STAGE PLANS: projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -96,8 +97,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized Map Vectorization: enabled: true @@ -113,24 +114,24 @@ STAGE PLANS: includeColumns: [0, 1, 2, 3, 5] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), stddev_pop(VALUE._col1), avg(VALUE._col2), var_pop(VALUE._col3), min(VALUE._col4) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), min(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), _col1 (type: double), (- _col1) (type: double), _col2 (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / _col2) (type: double), _col3 (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2)) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / _col2))) (type: double) + expressions: _col0 (type: bigint), (_col0 * -563L) (type: bigint), (-3728L + _col0) (type: bigint), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), (- power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5)) (type: double), (_col2 / _col3) (type: double), ((_col0 * -563L) % _col0) (type: bigint), (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double), (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3))) (type: double), ((-3728L + _col0) - (_col0 * -563L)) (type: bigint), _col4 (type: tinyint), _col4 (type: tinyint), (UDFToDouble(_col4) * (- (UDFToDouble(((_col0 * -563L) % _col0)) / (_col2 / _col3)))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 36 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -190,4 +191,4 @@ WHERE (((csmallint >= cint) POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --493101012745 277615870175435 -493101016473 136727.7868296355 -136727.7868296355 2298.5515807767374 0 0.0 1.8694487691330246E10 -0.0 -278108971191908 -64 -64 0.0 +-493101012745 277615870175435 -493101016473 136727.78682963562 -136727.78682963562 2298.5515807767374 0 0.0 1.8694487691330276E10 -0.0 -278108971191908 -64 -64 0.0 diff --git a/ql/src/test/results/clientpositive/vectorization_9.q.out b/ql/src/test/results/clientpositive/vectorization_9.q.out index 97c6ffbec9..571eae0f15 100644 --- a/ql/src/test/results/clientpositive/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/vectorization_9.q.out @@ -64,38 +64,39 @@ STAGE PLANS: predicate: (((cdouble >= -1.389D) or (cstring1 < 'a')) and (cstring2 like '%b%')) (type: boolean) Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) - outputColumnNames: cdouble, cstring1, ctimestamp1 + expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 8] + projectedOutputColumnNums: [6, 5, 8, 13] + selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble) + aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: - aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFVarDouble(col 5:double) -> struct aggregation: stddev_samp, VectorUDAFMinDouble(col 5:double) -> double + aggregators: VectorUDAFCount(col 5:double) -> bigint, VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFMinDouble(col 5:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 5:double, col 6:string, col 8:timestamp + keyExpressions: col 6:string, col 5:double, col 8:timestamp native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2] - keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp) + projectedOutputColumnNums: [0, 1, 2, 3] + keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ - Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) + Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) + value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -111,20 +112,20 @@ STAGE PLANS: includeColumns: [5, 6, 7, 8] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), stddev_samp(VALUE._col1), min(VALUE._col2) - keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp) + aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), min(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639D) (type: double), (- (_col0 - 9763215.5639D)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639D / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double) + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/vectorization_limit.q.out b/ql/src/test/results/clientpositive/vectorization_limit.q.out index a4ff11daa7..8226fd4adc 100644 --- a/ql/src/test/results/clientpositive/vectorization_limit.q.out +++ b/ql/src/test/results/clientpositive/vectorization_limit.q.out @@ -222,18 +222,18 @@ STAGE PLANS: selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col1) + aggregations: sum(_col1), count(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgDouble(col 13:double) -> struct + aggregators: VectorUDAFSumDouble(col 13:double) -> double, VectorUDAFCount(col 13:double) -> bigint className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 0:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -246,7 +246,7 @@ STAGE PLANS: nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 - value expressions: _col1 (type: struct) + value expressions: _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -269,21 +269,25 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 20 - Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vectorization_not.q.out b/ql/src/test/results/clientpositive/vectorization_not.q.out index b5587ba4d0..cf92a6f694 100644 --- a/ql/src/test/results/clientpositive/vectorization_not.q.out +++ b/ql/src/test/results/clientpositive/vectorization_not.q.out @@ -55,4 +55,4 @@ WHERE (((cstring2 LIKE '%b%') POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.4363874554593627E9 3.875716535945533E8 0.0 2.0634715172019392E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0516820315185745E9 -2.0634715172019392E18 1.5020929380914048E17 -64 64 diff --git a/ql/src/test/results/clientpositive/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/vectorization_pushdown.q.out index a13a0a08ce..d8a5b08e89 100644 --- a/ql/src/test/results/clientpositive/vectorization_pushdown.q.out +++ b/ql/src/test/results/clientpositive/vectorization_pushdown.q.out @@ -27,14 +27,14 @@ STAGE PLANS: outputColumnNames: cbigint Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(cbigint) + aggregations: sum(cbigint), count(cbigint) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -51,17 +51,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col0 / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out index dc7add7a61..07576fd421 100644 --- a/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/vectorized_distinct_gby.q.out @@ -146,30 +146,31 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] Select Operator - expressions: cint (type: int) - outputColumnNames: cint + expressions: cint (type: int), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [2] + projectedOutputColumnNums: [2, 13, 16] + selectExpressions: CastLongToDouble(col 2:int) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 2:int) -> 15:double) -> 16:double Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: sum(DISTINCT cint), count(DISTINCT cint), avg(DISTINCT cint), std(DISTINCT cint) + aggregations: sum(DISTINCT _col0), count(DISTINCT _col0), sum(DISTINCT _col2), sum(DISTINCT _col1) Group By Vectorization: - aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFAvgLong(col 2:int) -> struct, VectorUDAFVarLong(col 2:int) -> struct aggregation: std + aggregators: VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint, VectorUDAFSumDouble(col 16:double) -> double, VectorUDAFSumDouble(col 13:double) -> double className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 2:int + keyExpressions: col 2:int, col 16:double, col 13:double native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] - keys: cint (type: int) + keys: _col0 (type: int), _col2 (type: double), _col1 (type: double) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + + key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double) + sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false @@ -191,24 +192,28 @@ STAGE PLANS: includeColumns: [2] dataColumns: ctinyint:tinyint, csmallint:smallint, cint:int, cbigint:bigint, cfloat:float, cdouble:double, cstring1:string, cstring2:string, ctimestamp1:timestamp, ctimestamp2:timestamp, cboolean1:boolean, cboolean2:boolean partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), avg(DISTINCT KEY._col0:2._col0), std(DISTINCT KEY._col0:3._col0) + aggregations: sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), sum(DISTINCT KEY._col0:2._col0), sum(DISTINCT KEY._col0:3._col0) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), (UDFToDouble(_col0) / _col1) (type: double), power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -224,4 +229,4 @@ POSTHOOK: query: select sum(distinct cint), count(distinct cint), avg(distinct c POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### --3482841611 6082 -572647.4204209142 6.153814687328991E8 +-3482841611 6082 -572647.4204209142 6.153814687328982E8 diff --git a/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out index a6fee455ae..d9c781ce83 100644 --- a/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_mapjoin.q.out @@ -87,17 +87,17 @@ STAGE PLANS: selectExpressions: LongColAddLongColumn(col 0:int, col 1:int) -> 2:int Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) Group By Vectorization: - aggregators: VectorUDAFCount(col 0:int) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 0:int) -> int, VectorUDAFAvgLong(col 2:int) -> struct + aggregators: VectorUDAFCount(col 0:int) -> bigint, VectorUDAFMaxLong(col 1:int) -> int, VectorUDAFMinLong(col 0:int) -> int, VectorUDAFSumLong(col 2:int) -> bigint, VectorUDAFCount(col 2:int) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3] + projectedOutputColumnNums: [0, 1, 2, 3, 4] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -105,8 +105,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -125,17 +125,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 0dc582f77f..3b7de646f7 100644 --- a/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -344,26 +344,27 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cdecimal (type: decimal(4,2)) - outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1, cdecimal + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)), (cdouble * cdouble) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 10] + projectedOutputColumnNums: [1, 0, 2, 5, 3, 4, 10, 12] + selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double) -> 12:double Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) + aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col7), sum(_col5), count(_col5), max(_col6) Group By Vectorization: - aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFAvgDouble(col 3:float) -> struct, VectorUDAFVarDouble(col 4:double) -> struct aggregation: stddev_pop, VectorUDAFMaxDecimal(col 10:decimal(4,2)) -> decimal(4,2) + aggregators: VectorUDAFMaxLong(col 0:int) -> int, VectorUDAFMinLong(col 2:smallint) -> smallint, VectorUDAFCount(col 5:string) -> bigint, VectorUDAFSumDouble(col 3:float) -> double, VectorUDAFCount(col 3:float) -> bigint, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFSumDouble(col 4:double) -> double, VectorUDAFCount(col 4:double) -> bigint, VectorUDAFMaxDecimal(col 10:decimal(4,2)) -> decimal(4,2) className: VectorGroupByOperator groupByMode: HASH keyExpressions: col 1:tinyint native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5] - keys: ctinyint (type: tinyint) + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + keys: _col0 (type: tinyint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: tinyint) @@ -375,7 +376,7 @@ STAGE PLANS: nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(4,2)) Execution mode: vectorized Map Vectorization: enabled: true @@ -392,17 +393,21 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7), max(VALUE._col8) keys: KEY._col0 (type: tinyint) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double), _col9 (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -480,7 +485,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_types #### A masked pattern was here #### 1 121 1 8 1.1749999970197678 2.0621590627301285 90.33 -2 119 1 7 1.2142857142857142 1.8 60.12 +2 119 1 7 1.2142857142857142 1.8000000000000003 60.12 3 120 1 7 1.171428578240531 1.7999999999999996 90.21 PREHOOK: query: create table parquet_type_nodict like parquet_types stored as parquet tblproperties ("parquet.enable.dictionary"="false") diff --git a/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out index 6edb69aaad..1fe360f5f8 100644 --- a/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_shufflejoin.q.out @@ -71,10 +71,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: count(_col0), max(_col1), min(_col0), avg(_col2) + aggregations: count(_col0), max(_col1), min(_col0), sum(_col2), count(_col2) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -95,8 +95,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: bigint), _col4 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -113,16 +113,20 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), avg(VALUE._col3) + aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), (_col3 / _col4) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce @@ -138,7 +142,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: double) Execution mode: vectorized Map Vectorization: @@ -158,10 +162,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out index fb0672c002..b0bfc8b28c 100644 --- a/ql/src/test/results/clientpositive/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/vectorized_timestamp.q.out @@ -316,17 +316,17 @@ STAGE PLANS: projectedOutputColumnNums: [0] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ts) + aggregations: sum(ts), count(ts) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0:timestamp) -> struct + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -334,8 +334,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct) + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -358,17 +358,17 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: double), CAST( _col0 AS TIMESTAMP) (type: timestamp) + expressions: (_col0 / _col1) (type: double), CAST( (_col0 / _col1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -414,25 +414,26 @@ STAGE PLANS: native: true vectorizationSchemaColumns: [0:ts:timestamp, 1:ROW__ID:struct] Select Operator - expressions: ts (type: timestamp) - outputColumnNames: ts + expressions: ts (type: timestamp), UDFToDouble(ts) (type: double), (UDFToDouble(ts) * UDFToDouble(ts)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 2, 5] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 2:double, DoubleColMultiplyDoubleColumn(col 3:double, col 4:double)(children: CastTimestampToDouble(col 0:timestamp) -> 3:double, CastTimestampToDouble(col 0:timestamp) -> 4:double) -> 5:double Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: variance(ts), var_pop(ts), var_samp(ts), std(ts), stddev(ts), stddev_pop(ts), stddev_samp(ts) + aggregations: sum(_col2), sum(_col1), count(_col0) Group By Vectorization: - aggregators: VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: variance, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_samp, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: std, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_samp + aggregators: VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCount(col 0:timestamp) -> bigint className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + projectedOutputColumnNums: [0, 1, 2] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -440,8 +441,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint) Execution mode: vectorized Map Vectorization: enabled: true @@ -457,24 +458,28 @@ STAGE PLANS: includeColumns: [0] dataColumns: ts:timestamp partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [double, double, double, double] Reduce Vectorization: enabled: false enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: variance(VALUE._col0), var_pop(VALUE._col1), var_samp(VALUE._col2), std(VALUE._col3), stddev(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6) + aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), ((_col0 - ((_col1 * _col1) / _col2)) / _col2) (type: double), ((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / _col2), 0.5) (type: double), power(((_col0 - ((_col1 * _col1) / _col2)) / CASE WHEN ((_col2 = 1L)) THEN (null) ELSE ((_col2 - 1)) END), 0.5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 2ccff7b919..244aca6d00 100644 --- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -944,25 +944,26 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: ctimestamp1 (type: timestamp) - outputColumnNames: ctimestamp1 + expressions: ctimestamp1 (type: timestamp), UDFToDouble(ctimestamp1) (type: double), (UDFToDouble(ctimestamp1) * UDFToDouble(ctimestamp1)) (type: double) + outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 3, 6] + selectExpressions: CastTimestampToDouble(col 0:timestamp) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastTimestampToDouble(col 0:timestamp) -> 4:double, CastTimestampToDouble(col 0:timestamp) -> 5:double) -> 6:double Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: - aggregators: VectorUDAFAvgTimestamp(col 0:timestamp) -> struct, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: variance, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: var_samp, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: std, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_pop, VectorUDAFVarTimestamp(col 0:timestamp) -> struct aggregation: stddev_samp + aggregators: VectorUDAFSumTimestamp(col 0:timestamp) -> double, VectorUDAFCount(col 0:timestamp) -> bigint, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7] + projectedOutputColumnNums: [0, 1, 2, 3] mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Reduce Sink Vectorization: @@ -970,8 +971,8 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: double), _col3 (type: double) Execution mode: vectorized Map Vectorization: enabled: true @@ -988,17 +989,17 @@ STAGE PLANS: enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col2 BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), _col3 BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) + expressions: round((_col0 / _col1), 0) (type: double), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / _col1) BETWEEN 8.97077295279421E19D AND 8.97077295279422E19D (type: boolean), ((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) BETWEEN 9.20684592523616E19D AND 9.20684592523617E19D (type: boolean), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / _col1), 0.5), 3) (type: double), round(power(((_col2 - ((_col3 * _col3) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5), 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/view_cbo.q.out b/ql/src/test/results/clientpositive/view_cbo.q.out index c740596e70..3983e5bbad 100644 --- a/ql/src/test/results/clientpositive/view_cbo.q.out +++ b/ql/src/test/results/clientpositive/view_cbo.q.out @@ -25,27 +25,27 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: avg(_col2) + aggregations: sum(_col2), count(_col2) keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint) mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 + outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: struct) + value expressions: _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: avg(VALUE._col0) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) mode: mergepartial - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col3, _col4 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE pruneGroupingSetId: true Select Operator - expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE File Output Operator