diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java
new file mode 100644
index 0000000000..9298e51ebf
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+/**
+ * Aggregation function to represent: stddev_pop, stddev_samp, var_pop, var_samp.
+ */
+public class HiveSqlVarianceAggFunction extends SqlAggFunction {
+
+ public HiveSqlVarianceAggFunction(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(name, null, kind, returnTypeInference, operandTypeInference,
+ operandTypeChecker, SqlFunctionCategory.NUMERIC, false, false);
+ assert kind == SqlKind.STDDEV_POP || kind == SqlKind.STDDEV_SAMP ||
+ kind == SqlKind.VAR_POP || kind == SqlKind.VAR_SAMP;
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
new file mode 100644
index 0000000000..fb65ce180c
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
@@ -0,0 +1,535 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.CompositeList;
+import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Util;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This rule is a copy of {@link org.apache.calcite.rel.rules.AggregateReduceFunctionsRule}
+ * that regenerates Hive specific aggregate operators.
+ *
+ * TODO: When CALCITE-2216 is completed, we should be able to remove much of this code and
+ * just override the relevant methods.
+ *
+ * Planner rule that reduces aggregate functions in
+ * {@link org.apache.calcite.rel.core.Aggregate}s to simpler forms.
+ *
+ *
aggCallList) {
+ for (AggregateCall call : aggCallList) {
+ if (isReducible(call.getAggregation().getKind())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns whether the aggregate call is a reducible function
+ */
+ private boolean isReducible(final SqlKind kind) {
+ if (SqlKind.AVG_AGG_FUNCTIONS.contains(kind)) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in
+ * the aggregates list to.
+ *
+ * It handles newly generated common subexpressions since this was done
+ * at the sql2rel stage.
+ */
+ private void reduceAggs(
+ RelOptRuleCall ruleCall,
+ Aggregate oldAggRel) {
+ RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+
+ List oldCalls = oldAggRel.getAggCallList();
+ final int groupCount = oldAggRel.getGroupCount();
+ final int indicatorCount = oldAggRel.getIndicatorCount();
+
+ final List newCalls = Lists.newArrayList();
+ final Map aggCallMapping = Maps.newHashMap();
+
+ final List projList = Lists.newArrayList();
+
+ // pass through group key (+ indicators if present)
+ for (int i = 0; i < groupCount + indicatorCount; ++i) {
+ projList.add(
+ rexBuilder.makeInputRef(
+ getFieldType(oldAggRel, i),
+ i));
+ }
+
+ // List of input expressions. If a particular aggregate needs more, it
+ // will add an expression to the end, and we will create an extra
+ // project.
+ final RelBuilder relBuilder = ruleCall.builder();
+ relBuilder.push(oldAggRel.getInput());
+ final List inputExprs = new ArrayList<>(relBuilder.fields());
+
+ // create new agg function calls and rest of project list together
+ for (AggregateCall oldCall : oldCalls) {
+ projList.add(
+ reduceAgg(
+ oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs));
+ }
+
+ final int extraArgCount =
+ inputExprs.size() - relBuilder.peek().getRowType().getFieldCount();
+ if (extraArgCount > 0) {
+ relBuilder.project(inputExprs,
+ CompositeList.of(
+ relBuilder.peek().getRowType().getFieldNames(),
+ Collections.nCopies(extraArgCount, null)));
+ }
+ newAggregateRel(relBuilder, oldAggRel, newCalls);
+ relBuilder.project(projList, oldAggRel.getRowType().getFieldNames())
+ .convert(oldAggRel.getRowType(), false);
+ ruleCall.transformTo(relBuilder.build());
+ }
+
+ private RexNode reduceAgg(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ final SqlKind kind = oldCall.getAggregation().getKind();
+ if (isReducible(kind)) {
+ switch (kind) {
+ case AVG:
+ // replace original AVG(x) with SUM(x) / COUNT(x)
+ return reduceAvg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs);
+ case STDDEV_POP:
+ // replace original STDDEV_POP(x) with
+ // SQRT(
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / COUNT(x))
+ return reduceStddev(oldAggRel, oldCall, true, true, newCalls,
+ aggCallMapping, inputExprs);
+ case STDDEV_SAMP:
+ // replace original STDDEV_SAMP(x) with
+ // SQRT(
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END)
+ return reduceStddev(oldAggRel, oldCall, false, true, newCalls,
+ aggCallMapping, inputExprs);
+ case VAR_POP:
+ // replace original VAR_POP(x) with
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / COUNT(x)
+ return reduceStddev(oldAggRel, oldCall, true, false, newCalls,
+ aggCallMapping, inputExprs);
+ case VAR_SAMP:
+ // replace original VAR_SAMP(x) with
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END
+ return reduceStddev(oldAggRel, oldCall, false, false, newCalls,
+ aggCallMapping, inputExprs);
+ default:
+ throw Util.unexpected(kind);
+ }
+ } else {
+ // anything else: preserve original call
+ RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ final int nGroups = oldAggRel.getGroupCount();
+ List oldArgTypes =
+ SqlTypeUtil.projectTypes(
+ oldAggRel.getInput().getRowType(), oldCall.getArgList());
+ return rexBuilder.addAggCall(oldCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ oldArgTypes);
+ }
+ }
+
+ private AggregateCall createAggregateCallWithBinding(
+ RelDataTypeFactory typeFactory,
+ SqlAggFunction aggFunction,
+ RelDataType operandType,
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ int argOrdinal) {
+ final Aggregate.AggCallBinding binding =
+ new Aggregate.AggCallBinding(typeFactory, aggFunction,
+ ImmutableList.of(operandType), oldAggRel.getGroupCount(),
+ oldCall.filterArg >= 0);
+ return AggregateCall.create(aggFunction,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ ImmutableIntList.of(argOrdinal),
+ oldCall.filterArg,
+ aggFunction.inferReturnType(binding),
+ null);
+ }
+
+ private RexNode reduceAvg(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ final int nGroups = oldAggRel.getGroupCount();
+ final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
+ final int iAvgInput = oldCall.getArgList().get(0);
+ RelDataType avgInputType = typeFactory.createTypeWithNullability(
+ getFieldType(oldAggRel.getInput(), iAvgInput), true);
+ final AggregateCall sumCall =
+ AggregateCall.create(
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+ RelDataType countRetType = typeFactory.createTypeWithNullability(
+ typeFactory.createSqlType(SqlTypeName.BIGINT), true);
+ final AggregateCall countCall =
+ AggregateCall.create(
+ new HiveSqlCountAggFunction(
+ oldCall.isDistinct(),
+ ReturnTypes.explicit(countRetType),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ countRetType,
+ null);
+
+ // NOTE: these references are with respect to the output
+ // of newAggRel
+ RexNode numeratorRef =
+ rexBuilder.addAggCall(sumCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(avgInputType));
+ final RexNode denominatorRef =
+ rexBuilder.addAggCall(countCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(avgInputType));
+
+ numeratorRef = rexBuilder.ensureType(oldCall.getType(), numeratorRef, true);
+ final RexNode divideRef =
+ rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, numeratorRef, denominatorRef);
+ return rexBuilder.makeCast(oldCall.getType(), divideRef);
+ }
+
+ private RexNode reduceStddev(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ boolean biased,
+ boolean sqrt,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ // stddev_pop(x) ==>
+ // power(
+ // (sum(x * x) - sum(x) * sum(x) / count(x))
+ // / count(x),
+ // .5)
+ //
+ // stddev_samp(x) ==>
+ // power(
+ // (sum(x * x) - sum(x) * sum(x) / count(x))
+ // / nullif(count(x) - 1, 0),
+ // .5)
+ final int nGroups = oldAggRel.getGroupCount();
+ final RelOptCluster cluster = oldAggRel.getCluster();
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
+
+ assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
+ final int argOrdinal = oldCall.getArgList().get(0);
+ final RelDataType argOrdinalType = getFieldType(oldAggRel.getInput(), argOrdinal);
+ final RelDataType oldCallType =
+ typeFactory.createTypeWithNullability(oldCall.getType(), true);
+
+ final RexNode argRef =
+ rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), false);
+ final int argRefOrdinal = lookupOrAdd(inputExprs, argRef);
+
+ final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY,
+ argRef, argRef);
+ final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
+
+ final AggregateCall sumArgSquaredAggCall =
+ createAggregateCallWithBinding(typeFactory,
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal);
+
+ final RexNode sumArgSquared =
+ rexBuilder.addAggCall(sumArgSquaredAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(sumArgSquaredAggCall.getType()));
+
+ final AggregateCall sumArgAggCall =
+ AggregateCall.create(
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ ImmutableIntList.of(argRefOrdinal),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+
+ final RexNode sumArg =
+ rexBuilder.addAggCall(sumArgAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(sumArgAggCall.getType()));
+ final RexNode sumArgCast = rexBuilder.ensureType(oldCallType, sumArg, true);
+ final RexNode sumSquaredArg =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MULTIPLY, sumArgCast, sumArgCast);
+
+ RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true);
+ final AggregateCall countArgAggCall =
+ AggregateCall.create(
+ new HiveSqlCountAggFunction(
+ oldCall.isDistinct(),
+ ReturnTypes.explicit(countRetType),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ countRetType,
+ null);
+
+ final RexNode countArg =
+ rexBuilder.addAggCall(countArgAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(argOrdinalType));
+
+ final RexNode avgSumSquaredArg =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
+
+ final RexNode diff =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MINUS,
+ sumArgSquared, avgSumSquaredArg);
+
+ final RexNode denominator;
+ if (biased) {
+ denominator = countArg;
+ } else {
+ final RexLiteral one =
+ rexBuilder.makeExactLiteral(BigDecimal.ONE);
+ final RexNode nul =
+ rexBuilder.makeCast(countArg.getType(), rexBuilder.constantNull());
+ final RexNode countMinusOne =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MINUS, countArg, one);
+ final RexNode countEqOne =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.EQUALS, countArg, one);
+ denominator =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.CASE,
+ countEqOne, nul, countMinusOne);
+ }
+
+ final RexNode div =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.DIVIDE, diff, denominator);
+
+ RexNode result = div;
+ if (sqrt) {
+ final RexNode half =
+ rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
+ result =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.POWER, div, half);
+ }
+
+ return rexBuilder.makeCast(
+ oldCall.getType(), result);
+ }
+
+ /**
+ * Finds the ordinal of an element in a list, or adds it.
+ *
+ * @param list List
+ * @param element Element to lookup or add
+ * @return Ordinal of element in list
+ */
+ private static int lookupOrAdd(List list, RexNode element) {
+ for (int ordinal = 0; ordinal < list.size(); ordinal++) {
+ if (list.get(ordinal).toString().equals(element.toString())) {
+ return ordinal;
+ }
+ }
+ list.add(element);
+ return list.size() - 1;
+ }
+
+ /**
+ * Do a shallow clone of oldAggRel and update aggCalls. Could be refactored
+ * into Aggregate and subclasses - but it's only needed for some
+ * subclasses.
+ *
+ * @param relBuilder Builder of relational expressions; at the top of its
+ * stack is its input
+ * @param oldAggregate LogicalAggregate to clone.
+ * @param newCalls New list of AggregateCalls
+ */
+ protected void newAggregateRel(RelBuilder relBuilder,
+ Aggregate oldAggregate, List newCalls) {
+ relBuilder.aggregate(
+ relBuilder.groupKey(oldAggregate.getGroupSet(),
+ oldAggregate.getGroupSets()),
+ newCalls);
+ }
+
+ private RelDataType getFieldType(RelNode relNode, int i) {
+ final RelDataTypeField inputField =
+ relNode.getRowType().getFieldList().get(i);
+ return inputField.getType();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index cb0c2b1b35..950abe16f4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -49,6 +49,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlVarianceAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
@@ -577,48 +578,82 @@ public static SqlAggFunction getCalciteAggFn(String hiveUdfName, boolean isDisti
CalciteUDFInfo udfInfo = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType);
switch (hiveUdfName.toLowerCase()) {
- case "sum":
- calciteAggFn = new HiveSqlSumAggFunction(
- isDistinct,
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker);
- break;
- case "count":
- calciteAggFn = new HiveSqlCountAggFunction(
- isDistinct,
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker);
- break;
- case "min":
- calciteAggFn = new HiveSqlMinMaxAggFunction(
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker, true);
- break;
- case "max":
- calciteAggFn = new HiveSqlMinMaxAggFunction(
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker, false);
- break;
- case "avg":
- calciteAggFn = new HiveSqlAverageAggFunction(
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker);
+ case "sum":
+ calciteAggFn = new HiveSqlSumAggFunction(
+ isDistinct,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "count":
+ calciteAggFn = new HiveSqlCountAggFunction(
+ isDistinct,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "min":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker, true);
+ break;
+ case "max":
+ calciteAggFn = new HiveSqlMinMaxAggFunction(
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker, false);
+ break;
+ case "avg":
+ calciteAggFn = new HiveSqlAverageAggFunction(
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "std":
+ case "stddev":
+ case "stddev_pop":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "stddev_pop",
+ SqlKind.STDDEV_POP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "stddev_samp":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "stddev_samp",
+ SqlKind.STDDEV_SAMP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "variance":
+ case "var_pop":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "var_pop",
+ SqlKind.VAR_POP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "var_samp":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "var_samp",
+ SqlKind.VAR_SAMP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ default:
+ calciteAggFn = new CalciteUDAF(
+ isDistinct,
+ udfInfo.udfName,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
break;
- default:
- calciteAggFn = new CalciteUDAF(
- isDistinct,
- udfInfo.udfName,
- udfInfo.returnTypeInference,
- udfInfo.operandTypeInference,
- udfInfo.operandTypeChecker);
- break;
}
-
}
return calciteAggFn;
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 3520d90fa8..d90dde992b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -177,6 +177,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceFunctionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
@@ -1821,6 +1822,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
+ rules.add(HiveAggregateReduceFunctionsRule.INSTANCE);
rules.add(HiveAggregateReduceRule.INSTANCE);
if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses));
@@ -1839,7 +1841,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
rules.toArray(new RelOptRule[rules.size()]));
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
-// it is happening at 1762
+
// 4. Push down limit through outer join
// NOTE: We run this after PPD to support old style join syntax.
// Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
index 01c933c4a1..a0072f7e8e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
@@ -41,16 +41,30 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+ parameters[0].getTypeName() + " is passed.");
}
switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
case LONG:
- return new SumZeroIfEmpty();
+ return new SumLongZeroIfEmpty();
+ case TIMESTAMP:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ case VARCHAR:
+ case CHAR:
+ return new SumDoubleZeroIfEmpty();
+ case DECIMAL:
+ return new SumHiveDecimalZeroIfEmpty();
+ case BOOLEAN:
+ case DATE:
default:
throw new UDFArgumentTypeException(0,
- "Only bigint type arguments are accepted but "
+ "Only numeric or string type arguments are accepted but "
+ parameters[0].getTypeName() + " is passed.");
}
}
- public static class SumZeroIfEmpty extends GenericUDAFSumLong {
+ public static class SumLongZeroIfEmpty extends GenericUDAFSumLong {
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
@@ -59,5 +73,24 @@ public Object terminate(AggregationBuffer agg) throws HiveException {
return result;
}
}
-}
+ public static class SumDoubleZeroIfEmpty extends GenericUDAFSumDouble {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+
+ public static class SumHiveDecimalZeroIfEmpty extends GenericUDAFSumHiveDecimal {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+}
diff --git a/ql/src/test/queries/clientpositive/groupby3.q b/ql/src/test/queries/clientpositive/groupby3.q
index d709d9b5ad..284c2a8826 100755
--- a/ql/src/test/queries/clientpositive/groupby3.q
+++ b/ql/src/test/queries/clientpositive/groupby3.q
@@ -1,3 +1,4 @@
+set hive.cbo.enable=false;
set hive.mapred.mode=nonstrict;
set hive.explain.user=false;
set hive.map.aggr=false;
diff --git a/ql/src/test/queries/clientpositive/groupby3_map_skew.q b/ql/src/test/queries/clientpositive/groupby3_map_skew.q
index f9cb46ee32..8b18d11186 100644
--- a/ql/src/test/queries/clientpositive/groupby3_map_skew.q
+++ b/ql/src/test/queries/clientpositive/groupby3_map_skew.q
@@ -1,3 +1,4 @@
+set hive.cbo.enable=false;
set hive.mapred.mode=nonstrict;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
diff --git a/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q
new file mode 100644
index 0000000000..ae4733f705
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q
@@ -0,0 +1,60 @@
+set hive.cbo.enable=false;
+
+drop table t;
+create table t (id int,px int,y decimal,x decimal);
+
+insert into t values (101,1,1,1);
+insert into t values (201,2,1,1);
+insert into t values (301,3,1,1);
+insert into t values (401,4,1,11);
+insert into t values (501,5,1,null);
+insert into t values (601,6,null,1);
+insert into t values (701,6,null,null);
+insert into t values (102,1,2,2);
+insert into t values (202,2,1,2);
+insert into t values (302,3,2,1);
+insert into t values (402,4,2,12);
+insert into t values (502,5,2,null);
+insert into t values (602,6,null,2);
+insert into t values (702,6,null,null);
+insert into t values (103,1,3,3);
+insert into t values (203,2,1,3);
+insert into t values (303,3,3,1);
+insert into t values (403,4,3,13);
+insert into t values (503,5,3,null);
+insert into t values (603,6,null,3);
+insert into t values (703,6,null,null);
+insert into t values (104,1,4,4);
+insert into t values (204,2,1,4);
+insert into t values (304,3,4,1);
+insert into t values (404,4,4,14);
+insert into t values (504,5,4,null);
+insert into t values (604,6,null,4);
+insert into t values (704,6,null,null);
+insert into t values (800,7,1,1);
+
+
+explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px;
+
+select px,
+ round( var_pop(x),5),
+ round( var_pop(y),5),
+ round( corr(y,x),5),
+ round( covar_samp(y,x),5),
+ round( covar_pop(y,x),5),
+ regr_count(y,x),
+ round( regr_slope(y,x),5),
+ round( regr_intercept(y,x),5),
+ round( regr_r2(y,x),5),
+ round( regr_sxx(y,x),5),
+ round( regr_syy(y,x),5),
+ round( regr_sxy(y,x),5),
+ round( regr_avgx(y,x),5),
+ round( regr_avgy(y,x),5),
+ round( regr_count(y,x),5)
+ from t group by px order by px;
+
+
+select id,regr_count(y,x) over (partition by px) from t order by id;
diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
index bece89f0d5..58e6f4684e 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out
@@ -49,26 +49,27 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: $f0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: $f0, $f00, $f2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: $f0 (type: string)
sort order: +
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: $f2 (type: double), $f00 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0)
+ aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1)
mode: complete
- outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7, $f8, $f9, $f10
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: $f0, $f1, $f2, $f3, $f4, $f5, $f6, $f7
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: $f0 (type: double), $f1 (type: double), $f2 (type: double), UDFToDouble($f3) (type: double), UDFToDouble($f4) (type: double), $f5 (type: double), $f6 (type: double), $f7 (type: double), $f8 (type: double), $f9 (type: double), UDFToDouble($f10) (type: double)
+ expressions: $f0 (type: double), ($f0 / $f1) (type: double), ($f2 / $f3) (type: double), UDFToDouble($f4) (type: double), UDFToDouble($f5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / $f1), 0.5) (type: double), power((($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END), 0.5) (type: double), (($f6 - (($f7 * $f7) / $f1)) / $f1) (type: double), (($f6 - (($f7 * $f7) / $f1)) / CASE WHEN (($f1 = 1)) THEN (null) ELSE (($f1 - 1)) END) (type: double), $f2 (type: double), UDFToDouble($f3) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -77,19 +78,19 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double)
outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll')
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 5280 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 5256 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -146,10 +147,10 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ]
POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -164,4 +165,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0
+130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0
diff --git a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
index d6ff5b75cf..ee22ba4592 100644
--- a/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/count_dist_rewrite.q.out
@@ -280,29 +280,29 @@ STAGE PLANS:
outputColumnNames: key
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(key), min(key), avg(key)
+ aggregations: max(key), min(key), sum(key), count(key)
keys: key (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: struct)
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), min(VALUE._col1), avg(VALUE._col2)
+ aggregations: max(VALUE._col0), min(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(_col1), count(_col0), min(_col2), avg(_col3)
+ aggregations: max(_col1), count(_col0), min(_col2), sum(_col3), count(_col4)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -316,21 +316,25 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: struct)
+ Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), avg(VALUE._col3)
+ aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 1148 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), (_col3 / _col4) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 752 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -564,33 +568,33 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(), stddev(key)
- keys: key (type: string)
+ aggregations: count(), sum(_col2), sum(_col1), count(_col0)
+ keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col3 (type: struct)
+ value expressions: _col1 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), stddev(VALUE._col1)
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(_col1), count(_col0), stddev(_col2)
+ aggregations: count(_col1), count(_col0), sum(_col2), sum(_col3), count(_col4)
mode: partial2
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -604,21 +608,21 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct)
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), count(VALUE._col1), stddev(VALUE._col2)
+ aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(_col2) (type: int)
+ expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: int)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 48 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -683,33 +687,33 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col0), avg(_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0)
+ aggregations: sum(_col0), count(_col0), max(_col0), min(_col0), sum(_col2), sum(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), min(VALUE._col3), std(VALUE._col4), stddev_samp(VALUE._col5), variance(VALUE._col6), var_samp(VALUE._col7)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col1), avg(_col2), count(_col0), max(_col3), min(_col4), std(_col5), stddev_samp(_col6), variance(_col7), var_samp(_col8)
+ aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -723,21 +727,21 @@ STAGE PLANS:
TableScan
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct)
+ Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(_col5) (type: int), UDFToInteger(_col6) (type: int), UDFToInteger(_col7) (type: int), UDFToInteger(_col8) (type: int)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5)) (type: int), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / _col1)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1392 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 592 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -796,26 +800,26 @@ STAGE PLANS:
outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(key), count(DISTINCT key), min(key), avg(key)
+ aggregations: max(key), count(DISTINCT key), min(key), sum(key), count(key)
keys: value (type: string), key (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: struct)
+ value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), avg(VALUE._col3)
+ aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: double)
+ expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), (_col4 / _col5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/decimal_precision.q.out b/ql/src/test/results/clientpositive/decimal_precision.q.out
index 3f28106685..2ac3190bc8 100644
--- a/ql/src/test/results/clientpositive/decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/decimal_precision.q.out
@@ -542,27 +542,31 @@ STAGE PLANS:
outputColumnNames: dec
Statistics: Num rows: 1 Data size: 26610 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(dec), sum(dec)
+ aggregations: sum(dec), count(dec)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct), _col1 (type: decimal(30,10))
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -578,7 +582,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_precision
#### A masked pattern was here ####
-88499534.57586576220645 2743485571.8518386284
+88499534.575865762206451613 2743485571.8518386284
PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@decimal_precision
diff --git a/ql/src/test/results/clientpositive/decimal_udf.q.out b/ql/src/test/results/clientpositive/decimal_udf.q.out
index e451a186fc..e6df9fbda1 100644
--- a/ql/src/test/results/clientpositive/decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/decimal_udf.q.out
@@ -1282,26 +1282,26 @@ STAGE PLANS:
outputColumnNames: key, value
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(key), count(key), avg(key)
+ aggregations: sum(key), count(key)
keys: value (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct)
+ value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10))
+ expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), (CAST( _col1 AS decimal(24,14)) / _col2) (type: decimal(38,28)), _col1 (type: decimal(30,10))
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1319,10 +1319,10 @@ STAGE PLANS:
key expressions: _col0 (type: int)
sort order: +
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10))
+ value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(38,28)), _col3 (type: decimal(30,10))
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10))
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,28)), VALUE._col2 (type: decimal(30,10))
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1347,23 +1347,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_udf
#### A masked pattern was here ####
--1234567890 -1234567890.123456789000000000 -1234567890.12345678900000 -1234567890.1234567890
--1255 -1255.490000000000000000 -1255.49000000000000 -1255.4900000000
--11 -1.122000000000000000 -1.12200000000000 -1.1220000000
--1 -1.120000000000000000 -1.12000000000000 -2.2400000000
-0 0.025384615384615385 0.02538461538462 0.3300000000
-1 1.048400000000000000 1.04840000000000 5.2420000000
-2 2.000000000000000000 2.00000000000000 4.0000000000
-3 3.140000000000000000 3.14000000000000 9.4200000000
-4 3.140000000000000000 3.14000000000000 3.1400000000
-10 10.000000000000000000 10.00000000000000 10.0000000000
-20 20.000000000000000000 20.00000000000000 20.0000000000
-100 100.000000000000000000 100.00000000000000 100.0000000000
-124 124.000000000000000000 124.00000000000000 124.0000000000
-125 125.200000000000000000 125.20000000000000 125.2000000000
-200 200.000000000000000000 200.00000000000000 200.0000000000
-4400 -4400.000000000000000000 -4400.00000000000000 -4400.0000000000
-1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800
+-1234567890 -1234567890.123456789000000000 -1234567890.1234567890000000000000000000 -1234567890.1234567890
+-1255 -1255.490000000000000000 -1255.4900000000000000000000000000 -1255.4900000000
+-11 -1.122000000000000000 -1.1220000000000000000000000000 -1.1220000000
+-1 -1.120000000000000000 -1.1200000000000000000000000000 -2.2400000000
+0 0.025384615384615385 0.0253846153846153846153846154 0.3300000000
+1 1.048400000000000000 1.0484000000000000000000000000 5.2420000000
+2 2.000000000000000000 2.0000000000000000000000000000 4.0000000000
+3 3.140000000000000000 3.1400000000000000000000000000 9.4200000000
+4 3.140000000000000000 3.1400000000000000000000000000 3.1400000000
+10 10.000000000000000000 10.0000000000000000000000000000 10.0000000000
+20 20.000000000000000000 20.0000000000000000000000000000 20.0000000000
+100 100.000000000000000000 100.0000000000000000000000000000 100.0000000000
+124 124.000000000000000000 124.0000000000000000000000000000 124.0000000000
+125 125.200000000000000000 125.2000000000000000000000000000 125.2000000000
+200 200.000000000000000000 200.0000000000000000000000000000 200.0000000000
+4400 -4400.000000000000000000 -4400.0000000000000000000000000000 -4400.0000000000
+1234567890 1234567890.123456780000000000 1234567890.1234567800000000000000000000 1234567890.1234567800
PREHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN SELECT -key FROM DECIMAL_UDF
@@ -1849,35 +1849,39 @@ STAGE PLANS:
alias: decimal_udf
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: decimal(20,10)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev(key), variance(key)
- keys: value (type: int)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: stddev(VALUE._col0), variance(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1897,8 +1901,8 @@ POSTHOOK: Input: default@decimal_udf
-1255 0.0 0.0
-11 0.0 0.0
-1 0.0 0.0
-0 0.22561046704494161 0.050900082840236685
-1 0.05928102563215321 0.0035142400000000066
+0 0.22561046704494161 0.05090008284023669
+1 0.05928102563215448 0.003514240000000157
2 0.0 0.0
3 0.0 0.0
4 0.0 0.0
@@ -1926,35 +1930,39 @@ STAGE PLANS:
alias: decimal_udf
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: decimal(20,10)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(key), var_samp(key)
- keys: value (type: int)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3590 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1974,8 +1982,8 @@ POSTHOOK: Input: default@decimal_udf
-1255 NULL NULL
-11 NULL NULL
-1 0.0 0.0
-0 0.2348228191855647 0.055141756410256405
-1 0.06627820154470102 0.004392800000000008
+0 0.23482281918556472 0.05514175641025642
+1 0.06627820154470243 0.0043928000000001965
2 0.0 0.0
3 0.0 0.0
4 NULL NULL
diff --git a/ql/src/test/results/clientpositive/fetch_aggregation.q.out b/ql/src/test/results/clientpositive/fetch_aggregation.q.out
index f20320fd9b..801c6de35d 100644
--- a/ql/src/test/results/clientpositive/fetch_aggregation.q.out
+++ b/ql/src/test/results/clientpositive/fetch_aggregation.q.out
@@ -16,14 +16,14 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(key), sum(key), avg(key), min(key), max(key), std(key), variance(key)
+ aggregations: count(_col0), sum(_col0), min(_col0), max(_col0), sum(_col2), sum(_col1)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
@@ -36,11 +36,15 @@ STAGE PLANS:
limit: -1
Processor Tree:
Group By Operator
- aggregations: count(_col0), sum(_col1), avg(_col2), min(_col3), max(_col4), std(_col5), variance(_col6)
+ aggregations: count(_col0), sum(_col1), min(_col2), max(_col3), sum(_col4), sum(_col5)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 1 Data size: 800 Basic stats: COMPLETE Column stats: NONE
- ListSink
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: bigint), _col1 (type: double), (_col1 / _col0) (type: double), _col2 (type: string), _col3 (type: string), power(((_col4 - ((_col5 * _col5) / _col0)) / _col0), 0.5) (type: double), ((_col4 - ((_col5 * _col5) / _col0)) / _col0) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE
+ ListSink
PREHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),variance(key) from src
PREHOOK: type: QUERY
@@ -50,4 +54,4 @@ POSTHOOK: query: select count(key),sum(key),avg(key),min(key),max(key),std(key),
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
#### A masked pattern was here ####
-500 130091.0 260.182 0 98 142.92680950752384 20428.072876000006
+500 130091.0 260.182 0 98 142.9268095075238 20428.072876000002
diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out
index 7c97174830..0a566c7570 100644
--- a/ql/src/test/results/clientpositive/groupby3.q.out
+++ b/ql/src/test/results/clientpositive/groupby3.q.out
@@ -47,13 +47,13 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: value (type: string)
+ outputColumnNames: value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: substr(value, 5) (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Operator Tree:
Group By Operator
diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out
index edad22b93f..06c476b145 100644
--- a/ql/src/test/results/clientpositive/groupby3_map.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map.q.out
@@ -45,33 +45,33 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0)
+ aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -80,19 +80,19 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
- Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4064 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 3984 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -146,7 +146,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
index 20344640e6..f94ef49c27 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out
@@ -49,33 +49,33 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0), sum(DISTINCT _col0), count(DISTINCT _col0)
+ aggregations: sum(_col0), count(_col0), sum(DISTINCT _col0), count(DISTINCT _col0), max(_col0), min(_col0), sum(_col2), sum(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(VALUE._col4), min(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -84,19 +84,19 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double)
outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
- Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 516 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll')
mode: complete
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 4840 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 4832 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -153,10 +153,10 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ]
POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -171,4 +171,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876 20469.01089779559 79136.0 309.0
+130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0
diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
index e53e62c2ce..1ef5a45e88 100644
--- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out
@@ -46,12 +46,12 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: value (type: string)
+ outputColumnNames: value
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(_col0), avg(_col0), avg(DISTINCT _col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0)
- keys: _col0 (type: string)
+ aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5))
+ keys: substr(value, 5) (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out
index 1aa4cb6ce6..83e107054d 100644
--- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out
@@ -45,26 +45,27 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: double), _col1 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0)
+ aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1)
mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -73,7 +74,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double)
outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll')
mode: complete
@@ -135,7 +136,7 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -150,4 +151,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582
+130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593
diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
index bb964e66d6..6acc9a9016 100644
--- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out
@@ -49,26 +49,27 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col2 (type: double), _col1 (type: double)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(KEY._col0:0._col0), avg(KEY._col0:0._col0), avg(DISTINCT KEY._col0:0._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), std(KEY._col0:0._col0), stddev_samp(KEY._col0:0._col0), variance(KEY._col0:0._col0), var_samp(KEY._col0:0._col0), sum(DISTINCT KEY._col0:1._col0), count(DISTINCT KEY._col0:2._col0)
+ aggregations: sum(KEY._col0:0._col0), count(KEY._col0:0._col0), sum(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0), max(KEY._col0:0._col0), min(KEY._col0:0._col0), sum(VALUE._col0), sum(VALUE._col1)
mode: complete
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), UDFToDouble(_col10) (type: double)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), (_col2 / _col3) (type: double), UDFToDouble(_col4) (type: double), UDFToDouble(_col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / _col1), 0.5) (type: double), power(((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / _col1) (type: double), ((_col6 - ((_col7 * _col7) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END) (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
@@ -77,7 +78,7 @@ STAGE PLANS:
Select Operator
expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double)
outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11
- Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 416 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll'), compute_stats(c5, 'hll'), compute_stats(c6, 'hll'), compute_stats(c7, 'hll'), compute_stats(c8, 'hll'), compute_stats(c9, 'hll'), compute_stats(c10, 'hll'), compute_stats(c11, 'hll')
mode: complete
@@ -142,10 +143,10 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest1.c10 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c11 EXPRESSION [(src)src.null, ]
POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
-POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
@@ -160,4 +161,4 @@ POSTHOOK: query: SELECT dest1.* FROM dest1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@dest1
#### A masked pattern was here ####
-130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07287599999 20469.010897795582 79136.0 309.0
+130091.0 260.182 256.10355987055016 98.0 0.0 142.9268095075238 143.06995106518906 20428.072876000002 20469.010897795593 79136.0 309.0
diff --git a/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out b/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
index e894205a27..5d27f4ca2c 100644
--- a/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
+++ b/ql/src/test/results/clientpositive/groupby_grouping_sets3.q.out
@@ -45,27 +45,27 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -105,27 +105,27 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -191,23 +191,23 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint)
mode: partials
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -225,17 +225,17 @@ STAGE PLANS:
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 4 Data size: 2880 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: final
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 2 Data size: 1440 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/having2.q.out b/ql/src/test/results/clientpositive/having2.q.out
index 12fae67586..281b81d2ff 100644
--- a/ql/src/test/results/clientpositive/having2.q.out
+++ b/ql/src/test/results/clientpositive/having2.q.out
@@ -311,10 +311,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: sum(_col2), avg(_col0), count(_col4)
+ aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4)
keys: _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
@@ -332,16 +332,16 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: ((_col1 <= 4074689.000000041D) and (_col2 <= 822.0D) and (_col3 > 4L)) (type: boolean)
+ predicate: (((_col2 / _col3) <= 822.0D) and (_col1 <= 4074689.000000041D) and (_col4 > 4L)) (type: boolean)
Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col0 (type: string)
@@ -430,10 +430,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: sum(_col2), avg(_col0), count(_col4)
+ aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4)
keys: _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
@@ -451,20 +451,20 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
- outputColumnNames: _col1, _col2, _col3, _col4
+ expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: ((_col2 <= 4074689.000000041D) and (_col3 <= 822.0D) and (_col4 > 4L)) (type: boolean)
+ predicate: (((_col3 / _col4) <= 822.0D) and (_col2 <= 4074689.000000041D) and (_col5 > 4L)) (type: boolean)
Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col1 (type: string)
@@ -553,10 +553,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
Group By Operator
- aggregations: sum(_col2), avg(_col0), count(_col4)
+ aggregations: sum(_col2), sum(_col0), count(_col0), count(_col4)
keys: _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
File Output Operator
compressed: false
@@ -574,20 +574,20 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 550 Data size: 5843 Basic stats: PARTIAL Column stats: NONE
- value expressions: _col1 (type: double), _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Select Operator
- expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
- outputColumnNames: _col1, _col2, _col3, _col4
+ expressions: _col0 (type: string), _col1 (type: double), _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
+ outputColumnNames: _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 275 Data size: 2921 Basic stats: PARTIAL Column stats: NONE
Filter Operator
- predicate: ((_col2 <= 4074689.000000041D) and (_col3 <= 822.0D) and (_col4 > 4L)) (type: boolean)
+ predicate: (((_col3 / _col4) <= 822.0D) and (_col2 <= 4074689.000000041D) and (_col5 > 4L)) (type: boolean)
Statistics: Num rows: 10 Data size: 106 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col1 (type: string)
diff --git a/ql/src/test/results/clientpositive/limit_pushdown2.q.out b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
index 5aeb5213e5..bae6e248e7 100644
--- a/ql/src/test/results/clientpositive/limit_pushdown2.q.out
+++ b/ql/src/test/results/clientpositive/limit_pushdown2.q.out
@@ -24,10 +24,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -35,24 +35,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -118,10 +122,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -129,24 +133,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -212,10 +220,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col0 (type: string), _col1 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -223,24 +231,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -306,10 +318,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col1 (type: string), _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -317,24 +329,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -400,10 +416,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col1 (type: string), _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -411,24 +427,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -494,10 +514,10 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col1 (type: string), _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -505,24 +525,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: double), _col3 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -821,30 +845,34 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col1)
+ aggregations: sum(_col1), count(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
@@ -936,27 +964,27 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct)
+ value expressions: _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+ expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
File Output Operator
@@ -1025,27 +1053,27 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(_col2)
+ aggregations: sum(_col2), count(_col2)
keys: _col0 (type: string), _col1 (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct)
+ value expressions: _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3
+ outputColumnNames: _col0, _col1, _col3, _col4
Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double)
+ expressions: _col1 (type: string), _col0 (type: string), (_col3 / _col4) (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
index 347ae3780d..df84bbfa39 100644
--- a/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
+++ b/ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out
@@ -289,52 +289,56 @@ STAGE PLANS:
outputColumnNames: key
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: max(key), min(key), avg(key)
+ aggregations: max(key), min(key), sum(key), count(key)
keys: key (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col3, _col4
- Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: struct)
+ Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: string), _col3 (type: string), _col4 (type: double), _col5 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), min(VALUE._col1), avg(VALUE._col2)
+ aggregations: max(VALUE._col0), min(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 250 Data size: 177750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 117750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: max(_col1), count(_col0), min(_col2), avg(_col3)
+ aggregations: max(_col1), count(_col0), min(_col2), sum(_col3), count(_col4)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 632 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: struct)
+ Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), avg(VALUE._col3)
+ aggregations: max(VALUE._col0), count(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 392 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), (_col3 / _col4) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -576,51 +580,51 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: key (type: string)
- outputColumnNames: key
+ expressions: key (type: string), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: count(), stddev(key)
- keys: key (type: string)
+ aggregations: count(), sum(_col2), sum(_col1), count(_col0)
+ keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col3
- Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
+ Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col3 (type: struct)
+ Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col3 (type: double), _col4 (type: double), _col5 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), stddev(VALUE._col1)
+ aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 250 Data size: 43750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 250 Data size: 29750 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: count(_col1), count(_col0), stddev(_col2)
+ aggregations: count(_col1), count(_col0), sum(_col2), sum(_col3), count(_col4)
mode: partial2
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: struct)
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: double), _col3 (type: double), _col4 (type: bigint)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), count(VALUE._col1), stddev(VALUE._col2)
+ aggregations: count(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(_col2) (type: int)
+ expressions: _col0 (type: bigint), _col1 (type: bigint), UDFToInteger(power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: int)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -696,51 +700,51 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col0), avg(_col0), max(_col0), min(_col0), std(_col0), stddev_samp(_col0), variance(_col0), var_samp(_col0)
+ aggregations: sum(_col0), count(_col0), max(_col0), min(_col0), sum(_col2), sum(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7, _col8, _col9
- Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7
+ Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct)
+ Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), max(VALUE._col2), min(VALUE._col3), std(VALUE._col4), stddev_samp(VALUE._col5), variance(VALUE._col6), var_samp(VALUE._col7)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), max(VALUE._col2), min(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5)
keys: KEY._col0 (type: string)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 250 Data size: 284000 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col1), avg(_col2), count(_col0), max(_col3), min(_col4), std(_col5), stddev_samp(_col6), variance(_col7), var_samp(_col8)
+ aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6)
mode: partial2
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct)
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: double)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), avg(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2), max(VALUE._col3), min(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: double), _col1 (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(_col5) (type: int), UDFToInteger(_col6) (type: int), UDFToInteger(_col7) (type: int), UDFToInteger(_col8) (type: int)
+ expressions: _col0 (type: double), (_col0 / _col1) (type: double), _col2 (type: bigint), _col3 (type: string), _col4 (type: string), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5)) (type: int), UDFToInteger(power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / _col1)) (type: int), UDFToInteger(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END)) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 1 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
@@ -810,30 +814,30 @@ STAGE PLANS:
outputColumnNames: key, value
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: max(key), count(DISTINCT key), min(key), avg(key)
+ aggregations: max(key), count(DISTINCT key), min(key), sum(key), count(key)
keys: value (type: string), key (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 250 Data size: 202500 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 250 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 202500 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: struct)
+ Statistics: Num rows: 250 Data size: 142500 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col2 (type: string), _col4 (type: string), _col5 (type: double), _col6 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), avg(VALUE._col3)
+ aggregations: max(VALUE._col0), count(DISTINCT KEY._col1:0._col0), min(VALUE._col2), sum(VALUE._col3), count(VALUE._col4)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
- Statistics: Num rows: 250 Data size: 118750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 250 Data size: 120750 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), _col4 (type: double)
+ expressions: _col1 (type: string), _col2 (type: bigint), _col3 (type: string), (_col4 / _col5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
index 98743eb9db..ce53955692 100644
--- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
+++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out
@@ -2144,42 +2144,42 @@ Stage-0
limit:-1
Stage-1
Reducer 3 llap
- File Output Operator [FS_21]
- Merge Join Operator [MERGEJOIN_26] (rows=6 width=227)
- Conds:RS_17._col1=RS_18._col0(Left Semi),Output:["_col0","_col1","_col2"]
+ File Output Operator [FS_22]
+ Merge Join Operator [MERGEJOIN_27] (rows=6 width=227)
+ Conds:RS_18._col1=RS_19._col0(Left Semi),Output:["_col0","_col1","_col2"]
<-Reducer 2 [SIMPLE_EDGE] llap
- SHUFFLE [RS_17]
+ SHUFFLE [RS_18]
PartitionCols:_col1
Select Operator [SEL_6] (rows=13 width=227)
Output:["_col0","_col1","_col2"]
- Group By Operator [GBY_5] (rows=13 width=227)
- Output:["_col0","_col1","_col2"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0, KEY._col1
+ Group By Operator [GBY_5] (rows=13 width=235)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"],keys:KEY._col0, KEY._col1
<-Map 1 [SIMPLE_EDGE] llap
SHUFFLE [RS_4]
PartitionCols:_col0, _col1
- Group By Operator [GBY_3] (rows=13 width=295)
- Output:["_col0","_col1","_col2"],aggregations:["avg(p_size)"],keys:p_name, p_mfgr
- Filter Operator [FIL_24] (rows=26 width=223)
+ Group By Operator [GBY_3] (rows=13 width=235)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(p_size)","count(p_size)"],keys:p_name, p_mfgr
+ Filter Operator [FIL_25] (rows=26 width=223)
predicate:p_name is not null
TableScan [TS_0] (rows=26 width=223)
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_name","p_mfgr","p_size"]
<-Reducer 5 [SIMPLE_EDGE] llap
- SHUFFLE [RS_18]
+ SHUFFLE [RS_19]
PartitionCols:_col0
- Group By Operator [GBY_16] (rows=13 width=184)
+ Group By Operator [GBY_17] (rows=13 width=184)
Output:["_col0"],keys:_col0
- Select Operator [SEL_11] (rows=26 width=184)
+ Select Operator [SEL_12] (rows=26 width=184)
Output:["_col0"]
- Filter Operator [FIL_25] (rows=26 width=491)
+ Filter Operator [FIL_26] (rows=26 width=491)
predicate:first_value_window_0 is not null
- PTF Operator [PTF_10] (rows=26 width=491)
+ PTF Operator [PTF_11] (rows=26 width=491)
Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}]
- Select Operator [SEL_9] (rows=26 width=491)
+ Select Operator [SEL_10] (rows=26 width=491)
Output:["_col1","_col2","_col5"]
<-Map 4 [SIMPLE_EDGE] llap
- SHUFFLE [RS_8]
+ SHUFFLE [RS_9]
PartitionCols:p_mfgr
- TableScan [TS_7] (rows=26 width=223)
+ TableScan [TS_8] (rows=26 width=223)
default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_size"]
PREHOOK: query: explain select *
@@ -2386,12 +2386,12 @@ Stage-0
PartitionCols:_col0
Select Operator [SEL_20] (rows=1 width=12)
Output:["_col0","_col1"]
- Group By Operator [GBY_7] (rows=1 width=8)
- Output:["_col0"],aggregations:["avg(VALUE._col0)"]
+ Group By Operator [GBY_7] (rows=1 width=16)
+ Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","count(VALUE._col1)"]
<-Map 5 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_6]
- Group By Operator [GBY_5] (rows=1 width=76)
- Output:["_col0"],aggregations:["avg(p_size)"]
+ Group By Operator [GBY_5] (rows=1 width=16)
+ Output:["_col0","_col1"],aggregations:["sum(p_size)","count(p_size)"]
Filter Operator [FIL_33] (rows=8 width=4)
predicate:(p_size < 10)
TableScan [TS_2] (rows=26 width=4)
@@ -2405,7 +2405,9 @@ Stage-0
SHUFFLE [RS_22]
Group By Operator [GBY_12] (rows=1 width=16)
Output:["_col0","_col1"],aggregations:["count()","count(_col0)"]
- Please refer to the previous Group By Operator [GBY_7]
+ Select Operator [SEL_8] (rows=1 width=16)
+ Output:["_col0"]
+ Please refer to the previous Group By Operator [GBY_7]
<-Map 1 [CUSTOM_SIMPLE_EDGE] llap
PARTITION_ONLY_SHUFFLE [RS_21]
Select Operator [SEL_1] (rows=26 width=125)
diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out
index d050c4ec69..05b5bfddc7 100644
--- a/ql/src/test/results/clientpositive/llap/groupby3.q.out
+++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out
@@ -54,13 +54,13 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: substr(value, 5) (type: string)
- outputColumnNames: _col0
+ expressions: value (type: string)
+ outputColumnNames: value
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
- key expressions: _col0 (type: string)
+ key expressions: substr(value, 5) (type: string)
sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Map-reduce partition columns: substr(value, 5) (type: string)
Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE
Execution mode: llap
LLAP IO: no inputs
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
index fe6b4f96e0..d1ac4e9b83 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown.q.out
@@ -296,39 +296,43 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col1)
+ aggregations: sum(_col1), count(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.3
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 20
- Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out
index 97686cb5d1..960414ba37 100644
--- a/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out
+++ b/ql/src/test/results/clientpositive/llap/limit_pushdown3.q.out
@@ -311,35 +311,39 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col1)
+ aggregations: sum(_col1), count(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.3
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.3
- value expressions: _col1 (type: double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.3
+ value expressions: _col1 (type: double)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
index cce6bc3c0a..09a120ae12 100644
--- a/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
+++ b/ql/src/test/results/clientpositive/llap/offset_limit_ppd_optimizer.q.out
@@ -299,40 +299,44 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col1)
+ aggregations: sum(_col1), count(_col1)
keys: _col0 (type: string)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 250 Data size: 42750 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.3
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
- Limit
- Number of rows: 20
- Offset of rows: 10
- Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 26750 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 250 Data size: 24750 Basic stats: COMPLETE Column stats: COMPLETE
+ Limit
+ Number of rows: 20
+ Offset of rows: 10
Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 20 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
index 1ccdff8aa2..c786ba31c7 100644
--- a/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
+++ b/ql/src/test/results/clientpositive/llap/parquet_types_vectorization.q.out
@@ -191,34 +191,34 @@ STAGE PLANS:
alias: parquet_types
Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string)
- outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1
+ expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble)
- keys: ctinyint (type: tinyint)
+ aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5)
+ keys: _col0 (type: tinyint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
Statistics: Num rows: 22 Data size: 4576 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct)
+ value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs (cache only)
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4)
+ aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), sum(VALUE._col3), count(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), count(VALUE._col7)
keys: KEY._col0 (type: tinyint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double)
+ expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round((_col4 / _col5), 5) (type: double), round(power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5), 5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 11 Data size: 2288 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
index 277036a8e5..37b18f01e6 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out
@@ -360,30 +360,34 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col0)
+ aggregations: sum(_col0), count(_col0)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Reducer 5
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col0 is not null (type: boolean)
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: double)
- sort order: +
- Map-reduce partition columns: _col0 (type: double)
+ predicate: (_col0 is not null and _col1 is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: double)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -1268,17 +1272,17 @@ STAGE PLANS:
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(l_quantity)
+ aggregations: sum(l_quantity), count(l_quantity)
keys: l_partkey (type: int)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -1337,16 +1341,16 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col1 is not null (type: boolean)
- Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (_col1 is not null and _col2 is not null) (type: boolean)
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col1 (type: double), _col0 (type: int)
+ expressions: (_col1 / _col2) (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
@@ -4463,17 +4467,17 @@ STAGE PLANS:
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_partkey (type: int)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -4498,24 +4502,32 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: _col1 is not null (type: boolean)
- Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- keys: _col0 (type: int), _col1 (type: double)
- mode: hash
+ predicate: (_col1 is not null and _col2 is not null) (type: boolean)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col1 / _col2) (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int), _col1 (type: double)
- sort order: ++
- Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
- Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col1 (type: int), _col0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: double)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: double)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: double)
+ Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE
Stage: Stage-0
Fetch Operator
@@ -5051,23 +5063,23 @@ STAGE PLANS:
predicate: p_partkey is not null (type: boolean)
Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_partkey (type: int)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 13 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -5160,34 +5172,38 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(), count(_col1)
- keys: _col0 (type: int)
- mode: complete
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Group By Operator
+ aggregations: count(), count(_col1)
+ keys: _col0 (type: int)
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reducer 9
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 13 Data size: 156 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 13 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col1 (type: double), _col0 (type: int)
+ expressions: (_col1 / _col2) (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 13 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
index 390caf0a01..79857ab365 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out
@@ -279,17 +279,17 @@ STAGE PLANS:
predicate: p_mfgr is not null (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_mfgr (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Group By Operator
aggregations: max(p_size), min(p_size)
keys: p_mfgr (type: string)
@@ -308,17 +308,21 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -410,17 +414,17 @@ STAGE PLANS:
predicate: p_mfgr is not null (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_mfgr (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Group By Operator
aggregations: max(p_size), min(p_size)
keys: p_mfgr (type: string)
@@ -439,17 +443,21 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- key expressions: _col0 (type: string)
- sort order: +
- Map-reduce partition columns: _col0 (type: string)
+ Select Operator
+ expressions: _col0 (type: string), (_col1 / _col2) (type: double)
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: double)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double)
Reducer 3
Execution mode: llap
Reduce Operator Tree:
@@ -1432,17 +1440,17 @@ STAGE PLANS:
predicate: p_name is not null (type: boolean)
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_name (type: string), p_mfgr (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct)
+ value expressions: _col2 (type: bigint), _col3 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Map 3
@@ -1462,13 +1470,13 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string), KEY._col1 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: string), _col0 (type: string), _col2 (type: double)
+ expressions: _col1 (type: string), _col0 (type: string), (_col2 / _col3) (type: double)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
index 438e44470a..24f18e9672 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out
@@ -2888,35 +2888,39 @@ STAGE PLANS:
outputColumnNames: l_quantity
Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(l_quantity)
+ aggregations: sum(l_quantity), count(l_quantity)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double), _col1 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 10
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(), count(_col0)
- mode: complete
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Group By Operator
+ aggregations: count(), count(_col0)
+ mode: complete
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Select Operator
- expressions: _col0 (type: double), true (type: boolean)
+ expressions: (_col0 / _col1) (type: double), true (type: boolean)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
index 851a783fd6..7b8e87aa22 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out
@@ -715,14 +715,14 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col0)
+ aggregations: sum(_col0), count(_col0)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
PTF Operator
Function definitions:
Input definition
@@ -752,41 +752,45 @@ STAGE PLANS:
outputColumnNames: _col0
Statistics: Num rows: 8 Data size: 2960 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col0)
+ aggregations: sum(_col0), count(_col0)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Reducer 6
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: count(), count(_col0)
- mode: complete
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ Group By Operator
+ aggregations: count(), count(_col0)
+ mode: complete
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: bigint), _col1 (type: bigint)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Reducer 7
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: double), true (type: boolean)
+ expressions: (_col0 / _col1) (type: double), true (type: boolean)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
index 1483651f62..4a8c9b1574 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out
@@ -122,14 +122,14 @@ STAGE PLANS:
outputColumnNames: p_size
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -143,14 +143,14 @@ STAGE PLANS:
1
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
residual filter predicates: {(UDFToDouble(_col5) > _col9)}
- Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 8 Data size: 5600 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 5120 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -159,14 +159,18 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE
- Reduce Output Operator
- sort order:
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
@@ -826,14 +830,14 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int)
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -892,14 +896,18 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: double)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
@@ -1595,17 +1603,17 @@ STAGE PLANS:
predicate: p_type is not null (type: boolean)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
keys: p_type (type: string)
mode: hash
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -1635,13 +1643,13 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: string)
mode: mergepartial
- outputColumnNames: _col0, _col1
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col1 (type: double), true (type: boolean), _col0 (type: string)
+ expressions: (_col1 / _col2) (type: double), true (type: boolean), _col0 (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
@@ -4177,17 +4185,17 @@ STAGE PLANS:
predicate: l_partkey is not null (type: boolean)
Statistics: Num rows: 100 Data size: 1200 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(l_quantity)
+ aggregations: sum(l_quantity), count(l_quantity)
keys: l_partkey (type: int)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 50 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Reducer 2
@@ -4236,13 +4244,13 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 50 Data size: 1000 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col1 (type: double), _col0 (type: int)
+ expressions: (_col1 / _col2) (type: double), _col0 (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 50 Data size: 600 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
@@ -6328,29 +6336,29 @@ STAGE PLANS:
outputColumnNames: _col0, _col2
Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(_col0)
+ aggregations: sum(_col0), count(_col0)
keys: _col2 (type: int)
mode: hash
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint), _col2 (type: bigint)
Reducer 4
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
- predicate: (_col1 = 0.0D) (type: boolean)
- Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ predicate: (0.0D = (_col1 / _col2)) (type: boolean)
+ Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: _col0 (type: int)
outputColumnNames: _col0
diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
index abbfffd9be..8ad070508e 100644
--- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out
@@ -3815,14 +3815,14 @@ STAGE PLANS:
predicate: p_partkey BETWEEN 1 AND 20 (type: boolean)
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(p_partkey)
+ aggregations: sum(p_partkey), count(p_partkey)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Map 9
@@ -3928,14 +3928,18 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: double)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
@@ -4175,14 +4179,14 @@ STAGE PLANS:
outputColumnNames: p_size
Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Execution mode: llap
LLAP IO: no inputs
Map 6
@@ -4254,14 +4258,18 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: double)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double)
Reducer 7
Execution mode: llap
Reduce Operator Tree:
@@ -4386,14 +4394,14 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: int)
Group By Operator
- aggregations: avg(p_size)
+ aggregations: sum(p_size), count(p_size)
mode: hash
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
- Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct)
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint), _col1 (type: bigint)
Group By Operator
aggregations: sum(p_size)
mode: hash
@@ -4512,14 +4520,18 @@ STAGE PLANS:
Execution mode: llap
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
mode: mergepartial
- outputColumnNames: _col0
- Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- sort order:
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: double)
+ outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: double)
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: double)
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out
index 5d4bfe7fa4..1e090f0c1c 100644
--- a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out
@@ -140,9 +140,9 @@ STAGE PLANS:
projectedOutputColumnNums: [6]
Statistics: Num rows: 2000 Data size: 212912 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(dc), max(dc), sum(dc), avg(dc)
+ aggregations: min(dc), max(dc), sum(dc), count(dc)
Group By Vectorization:
- aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 6:decimal(38,18)) -> struct
+ aggregators: VectorUDAFMinDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 6:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 6:decimal(38,18)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -150,7 +150,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -159,8 +159,8 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct)
+ Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -190,14 +190,14 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,18), VALUE._col2:decimal(38,18), VALUE._col3:struct
+ dataColumns: VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,18), VALUE._col2:decimal(38,18), VALUE._col3:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
Group By Vectorization:
- aggregators: VectorUDAFMinDecimal(col 0:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 1:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 2:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(38,18)
+ aggregators: VectorUDAFMinDecimal(col 0:decimal(38,18)) -> decimal(38,18), VectorUDAFMaxDecimal(col 1:decimal(38,18)) -> decimal(38,18), VectorUDAFSumDecimal(col 2:decimal(38,18)) -> decimal(38,18), VectorUDAFCountMerge(col 3:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
@@ -205,17 +205,26 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), (_col2 / _col3) (type: decimal(38,18))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 5]
+ selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(38,18), col 4:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,18)
+ Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -271,9 +280,9 @@ STAGE PLANS:
projectedOutputColumnNums: [5]
Statistics: Num rows: 2000 Data size: 15208 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(d), max(d), sum(d), avg(d)
+ aggregations: min(d), max(d), sum(d), count(d)
Group By Vectorization:
- aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFAvgDouble(col 5:double) -> struct
+ aggregators: VectorUDAFMinDouble(col 5:double) -> double, VectorUDAFMaxDouble(col 5:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 5:double) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -281,7 +290,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -290,8 +299,8 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct)
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -321,14 +330,14 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:double, VALUE._col3:struct
+ dataColumns: VALUE._col0:double, VALUE._col1:double, VALUE._col2:double, VALUE._col3:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
Group By Vectorization:
- aggregators: VectorUDAFMinDouble(col 0:double) -> double, VectorUDAFMaxDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgFinal(col 3:struct) -> double
+ aggregators: VectorUDAFMinDouble(col 0:double) -> double, VectorUDAFMaxDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
@@ -336,17 +345,26 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 4:double
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -402,9 +420,9 @@ STAGE PLANS:
projectedOutputColumnNums: [10]
Statistics: Num rows: 2000 Data size: 76040 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: min(ts), max(ts), sum(ts), avg(ts)
+ aggregations: min(ts), max(ts), sum(ts), count(ts)
Group By Vectorization:
- aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFAvgTimestamp(col 10:timestamp) -> struct
+ aggregators: VectorUDAFMinTimestamp(col 10:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 10:timestamp) -> timestamp, VectorUDAFSumTimestamp(col 10:timestamp) -> double, VectorUDAFCount(col 10:timestamp) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -412,7 +430,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -421,8 +439,8 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: [0, 1, 2, 3]
- Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct)
+ Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -452,14 +470,14 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp, VALUE._col2:double, VALUE._col3:struct
+ dataColumns: VALUE._col0:timestamp, VALUE._col1:timestamp, VALUE._col2:double, VALUE._col3:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), avg(VALUE._col3)
+ aggregations: min(VALUE._col0), max(VALUE._col1), sum(VALUE._col2), count(VALUE._col3)
Group By Vectorization:
- aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgFinal(col 3:struct) -> double
+ aggregators: VectorUDAFMinTimestamp(col 0:timestamp) -> timestamp, VectorUDAFMaxTimestamp(col 1:timestamp) -> timestamp, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
@@ -467,17 +485,26 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), (_col2 / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 4:double
+ Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
index 4cb6213b89..861ae9ab14 100644
--- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out
@@ -142,19 +142,19 @@ STAGE PLANS:
projectedOutputColumnNums: [2]
Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(50), avg(50.0D), avg(50)
+ aggregations: sum(50), count(), sum(50.0D), count(50.0D), sum(50), count(50)
Group By Vectorization:
- aggregators: VectorUDAFAvgLong(ConstantVectorExpression(val 50) -> 12:int) -> struct, VectorUDAFAvgDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> struct, VectorUDAFAvgDecimal(ConstantVectorExpression(val 50) -> 14:decimal(10,0)) -> struct
+ aggregators: VectorUDAFSumLong(ConstantVectorExpression(val 50) -> 12:int) -> bigint, VectorUDAFCountStar(*) -> bigint, VectorUDAFSumDouble(ConstantVectorExpression(val 50.0) -> 13:double) -> double, VectorUDAFCount(ConstantVectorExpression(val 50.0) -> 14:double) -> bigint, VectorUDAFSumDecimal(ConstantVectorExpression(val 50) -> 15:decimal(10,0)) -> decimal(20,0), VectorUDAFCount(ConstantVectorExpression(val 50) -> 16:decimal(10,0)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 2:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -163,9 +163,9 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 256 Data size: 114688 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE
TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(12,0)), _col6 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -187,29 +187,38 @@ STAGE PLANS:
vectorized: true
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), count(VALUE._col5)
Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 1:struct) -> double, VectorUDAFAvgFinal(col 2:struct) -> double, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,4)
+ aggregators: VectorUDAFSumLong(col 1:bigint) -> bigint, VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFSumDecimal(col 5:decimal(12,0)) -> decimal(12,0), VectorUDAFCountMerge(col 6:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:int
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Reduce Sink Vectorization:
- className: VectorReduceSinkObjectHashOperator
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 256 Data size: 39936 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), (_col1 / _col2) (type: double), (_col3 / _col4) (type: double), CAST( (_col5 / _col6) AS decimal(6,4)) (type: decimal(6,4))
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Vectorization:
+ className: VectorSelectOperator
native: true
- nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ projectedOutputColumnNums: [0, 7, 8, 11]
+ selectExpressions: LongColDivideLongColumn(col 1:bigint, col 2:bigint) -> 7:double, DoubleColDivideLongColumn(col 3:double, col 4:bigint) -> 8:double, CastDecimalToDecimal(col 10:decimal(32,20))(children: DecimalColDivideDecimalColumn(col 5:decimal(12,0), col 9:decimal(19,0))(children: CastLongToDecimal(col 6:bigint) -> 9:decimal(19,0)) -> 10:decimal(32,20)) -> 11:decimal(6,4)
Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE
- TopN Hash Memory Usage: 0.1
- value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4))
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkObjectHashOperator
+ native: true
+ nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ Statistics: Num rows: 256 Data size: 33792 Basic stats: COMPLETE Column stats: COMPLETE
+ TopN Hash Memory Usage: 0.1
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(6,4))
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -220,7 +229,7 @@ STAGE PLANS:
vectorized: true
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(14,4))
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: decimal(6,4))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
index 4bb8a01059..902d137b9c 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out
@@ -252,26 +252,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(20,10), 2:cdecimal2:decimal(23,14), 3:cint:int, 4:ROW__ID:struct]
Select Operator
- expressions: cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), cint (type: int)
- outputColumnNames: cdecimal1, cdecimal2, cint
+ expressions: cint (type: int), cdecimal1 (type: decimal(20,10)), cdecimal2 (type: decimal(23,14)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 2, 3]
+ projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10]
+ selectExpressions: CastDecimalToDouble(col 1:decimal(20,10)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(20,10)) -> 6:double, CastDecimalToDouble(col 1:decimal(20,10)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(23,14)) -> 7:double, CastDecimalToDouble(col 2:decimal(23,14)) -> 9:double) -> 10:double
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count()
Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFAvgDecimal(col 1:decimal(20,10)) -> struct, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFAvgDecimal(col 2:decimal(23,14)) -> struct, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(23,14)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFCount(col 1:decimal(20,10)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 1:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 1:decimal(20,10)) -> decimal(30,10), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(23,14)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 2:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 2:decimal(23,14)) -> decimal(33,14), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 3:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
- keys: cint (type: int)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -282,9 +283,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+ valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint)
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -301,30 +302,58 @@ STAGE PLANS:
includeColumns: [1, 2, 3]
dataColumns: cdouble:double, cdecimal1:decimal(20,10), cdecimal2:decimal(23,14), cint:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 14
+ dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(20,10), VALUE._col2:decimal(20,10), VALUE._col3:decimal(30,10), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(23,14), VALUE._col8:decimal(23,14), VALUE._col9:decimal(33,14), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+ aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(20,10)) -> decimal(20,10), VectorUDAFMinDecimal(col 3:decimal(20,10)) -> decimal(20,10), VectorUDAFSumDecimal(col 4:decimal(30,10)) -> decimal(30,10), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(23,14)) -> decimal(23,14), VectorUDAFMinDecimal(col 9:decimal(23,14)) -> decimal(23,14), VectorUDAFSumDecimal(col 10:decimal(33,14)) -> decimal(33,14), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col15 > 1L) (type: boolean)
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1)
+ predicate: (_col13 > 1L) (type: boolean)
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: decimal(24,14)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: decimal(27,18)), _col13 (type: double), _col14 (type: double)
+ expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), (CAST( _col4 AS decimal(24,14)) / _col1) (type: decimal(38,28)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(23,14)), _col9 (type: decimal(23,14)), _col10 (type: decimal(33,14)), (CAST( _col10 AS decimal(27,18)) / _col7) (type: decimal(38,29)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 24, 19, 25]
+ selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(24,14), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(30,10)) -> 14:decimal(24,14), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(38,28), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, DecimalColDivideDecimalColumn(col 23:decimal(27,18), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(33,14)) -> 23:decimal(27,18), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,29), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -355,14 +384,14 @@ POSTHOOK: query: SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby
#### A masked pattern was here ####
--3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.67352472963333 2174330.2092403853 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.811207307641183333 2604201.2704476737 2852759.5602156054
--563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.63641486490000 1426.0153418918999 2016.6902366556308 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.503273076922950000 1707.9424961538462 2415.395441814127
-253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.33992366976309 5708.9563478862 5711.745967572779 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.428359675480791885 6837.632716002934 6840.973851172274
-528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.41099682432305 257528.92988206653 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.965624807691689482 308443.1074570801 308593.82484083984
-626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.29399661106318 5742.09145323734 5744.897264034267 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.329148046874977988 6877.318722794877 6880.679250101603
-6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.67570081066667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.696514615282066667 3292794.4113115156 4032833.0678006653
-762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.74432689170000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.078394999846250000 3491310.1327026924 4937458.140118758
-NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.60810810806667 5695.483082135364 5696.4103077145055 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.576923076922966667 6821.495748565159 6822.606289190924
+-3728 6 5831542.2692483780 -3367.6517567568 5817556.0411483778 969592.6735247296333333333333333333 2174330.209240386 2381859.406131774 6 6984454.21109769200000 -4033.44576923076900 6967702.86724384584710 1161283.81120730764118333333333333333 2604201.2704476737 2852759.5602156054
+-563 2 -515.6210729730 -3367.6517567568 -3883.2728297298 -1941.6364148649000000000000000000 1426.0153418918997 2016.6902366556305 2 -617.56077692307690 -4033.44576923076900 -4651.00654615384590 -2325.50327307692295000000000000000 1707.9424961538462 2415.395441814127
+253665376 1024 9767.0054054054 -9779.5486486487 -347484.0818378374 -339.3399236697630859375000000000 5708.956347886203 5711.745967572781 1024 11697.96923076923100 -11712.99230769231000 -416182.64030769233089 -406.42835967548079188476562500000 6837.632716002931 6840.973851172272
+528534767 1024 5831542.2692483780 -9777.1594594595 11646372.8607481068 11373.4109968243230468750000000000 257528.9298820665 257654.7686043977 1024 6984454.21109769200000 -11710.13076923077100 13948892.79980307629003 13621.96562480769168948242187500000 308443.1074570797 308593.82484083937
+626923679 1024 9723.4027027027 -9778.9513513514 10541.0525297287 10.2939966110631835937500000000 5742.091453237337 5744.897264034264 1024 11645.74615384615400 -11712.27692307692300 12625.04759999997746 12.32914804687497798828125000000 6877.318722794881 6880.679250101608
+6981 3 5831542.2692483780 -515.6210729730 5830511.0271024320 1943503.6757008106666666666666666667 2749258.455012492 3367140.1929065133 3 6984454.21109769200000 -617.56077692307690 6983219.08954384584620 2327739.69651461528206666666666666667 3292794.4113115156 4032833.0678006653
+762 2 5831542.2692483780 1531.2194054054 5833073.4886537834 2916536.7443268917000000000000000000 2915005.5249214866 4122440.3477364695 2 6984454.21109769200000 1833.94569230769250 6986288.15678999969250 3493144.07839499984625000000000000000 3491310.1327026924 4937458.140118757
+NULL 3072 9318.4351351351 -4298.1513513514 5018444.1081079808 1633.6081081080666666666666666667 5695.483082135323 5696.410307714464 3072 11160.71538461538500 -5147.90769230769300 6010604.30769230735360 1956.57692307692296666666666666667 6821.495748565151 6822.606289190915
PREHOOK: query: CREATE TABLE decimal_vgby_small STORED AS TEXTFILE AS
SELECT cdouble, CAST (((cdouble*22.1)/37) AS DECIMAL(11,5)) AS cdecimal1,
CAST (((cdouble*9.3)/13) AS DECIMAL(16,0)) AS cdecimal2,
@@ -637,26 +666,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:cdouble:double, 1:cdecimal1:decimal(11,5), 2:cdecimal2:decimal(16,0), 3:cint:int, 4:ROW__ID:struct]
Select Operator
- expressions: cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), cint (type: int)
- outputColumnNames: cdecimal1, cdecimal2, cint
+ expressions: cint (type: int), cdecimal1 (type: decimal(11,5)), cdecimal2 (type: decimal(16,0)), UDFToDouble(cdecimal1) (type: double), (UDFToDouble(cdecimal1) * UDFToDouble(cdecimal1)) (type: double), UDFToDouble(cdecimal2) (type: double), (UDFToDouble(cdecimal2) * UDFToDouble(cdecimal2)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [1, 2, 3]
+ projectedOutputColumnNums: [3, 1, 2, 5, 8, 6, 10]
+ selectExpressions: CastDecimalToDouble(col 1:decimal(11,5)) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastDecimalToDouble(col 1:decimal(11,5)) -> 6:double, CastDecimalToDouble(col 1:decimal(11,5)) -> 7:double) -> 8:double, CastDecimalToDouble(col 2:decimal(16,0)) -> 6:double, DoubleColMultiplyDoubleColumn(col 7:double, col 9:double)(children: CastDecimalToDouble(col 2:decimal(16,0)) -> 7:double, CastDecimalToDouble(col 2:decimal(16,0)) -> 9:double) -> 10:double
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: count(cdecimal1), max(cdecimal1), min(cdecimal1), sum(cdecimal1), avg(cdecimal1), stddev_pop(cdecimal1), stddev_samp(cdecimal1), count(cdecimal2), max(cdecimal2), min(cdecimal2), sum(cdecimal2), avg(cdecimal2), stddev_pop(cdecimal2), stddev_samp(cdecimal2), count()
+ aggregations: count(_col1), max(_col1), min(_col1), sum(_col1), sum(_col4), sum(_col3), count(_col2), max(_col2), min(_col2), sum(_col2), sum(_col6), sum(_col5), count()
Group By Vectorization:
- aggregators: VectorUDAFCount(col 1:decimal(11,5)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 1:decimal(11,5)) -> decimal(21,5), VectorUDAFAvgDecimal(col 1:decimal(11,5)) -> struct, VectorUDAFVarDecimal(col 1:decimal(11,5)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 1:decimal(11,5)) -> struct aggregation: stddev_samp, VectorUDAFCount(col 2:decimal(16,0)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 2:decimal(16,0)) -> decimal(26,0), VectorUDAFAvgDecimal(col 2:decimal(16,0)) -> struct, VectorUDAFVarDecimal(col 2:decimal(16,0)) -> struct aggregation: stddev_pop, VectorUDAFVarDecimal(col 2:decimal(16,0)) -> struct aggregation: stddev_samp, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFCount(col 1:decimal(11,5)) -> bigint, VectorUDAFMaxDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 1:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 1:decimal(11,5)) -> decimal(21,5), VectorUDAFSumDouble(col 8:double) -> double, VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFCount(col 2:decimal(16,0)) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 2:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 2:decimal(16,0)) -> decimal(26,0), VectorUDAFSumDouble(col 10:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 3:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
- keys: cint (type: int)
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -667,9 +697,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
+ valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint)
+ value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), _col11 (type: double), _col12 (type: double), _col13 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -687,30 +717,58 @@ STAGE PLANS:
includeColumns: [1, 2, 3]
dataColumns: cdouble:double, cdecimal1:decimal(11,5), cdecimal2:decimal(16,0), cint:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_pop" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 14
+ dataColumns: KEY._col0:int, VALUE._col0:bigint, VALUE._col1:decimal(11,5), VALUE._col2:decimal(11,5), VALUE._col3:decimal(21,5), VALUE._col4:double, VALUE._col5:double, VALUE._col6:bigint, VALUE._col7:decimal(16,0), VALUE._col8:decimal(16,0), VALUE._col9:decimal(26,0), VALUE._col10:double, VALUE._col11:double, VALUE._col12:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), avg(VALUE._col4), stddev_pop(VALUE._col5), stddev_samp(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9), sum(VALUE._col10), avg(VALUE._col11), stddev_pop(VALUE._col12), stddev_samp(VALUE._col13), count(VALUE._col14)
+ aggregations: count(VALUE._col0), max(VALUE._col1), min(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), count(VALUE._col6), max(VALUE._col7), min(VALUE._col8), sum(VALUE._col9), sum(VALUE._col10), sum(VALUE._col11), count(VALUE._col12)
+ Group By Vectorization:
+ aggregators: VectorUDAFCountMerge(col 1:bigint) -> bigint, VectorUDAFMaxDecimal(col 2:decimal(11,5)) -> decimal(11,5), VectorUDAFMinDecimal(col 3:decimal(11,5)) -> decimal(11,5), VectorUDAFSumDecimal(col 4:decimal(21,5)) -> decimal(21,5), VectorUDAFSumDouble(col 5:double) -> double, VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFCountMerge(col 7:bigint) -> bigint, VectorUDAFMaxDecimal(col 8:decimal(16,0)) -> decimal(16,0), VectorUDAFMinDecimal(col 9:decimal(16,0)) -> decimal(16,0), VectorUDAFSumDecimal(col 10:decimal(26,0)) -> decimal(26,0), VectorUDAFSumDouble(col 11:double) -> double, VectorUDAFSumDouble(col 12:double) -> double, VectorUDAFCountMerge(col 13:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
Statistics: Num rows: 6144 Data size: 1330955 Basic stats: COMPLETE Column stats: NONE
Filter Operator
- predicate: (_col15 > 1L) (type: boolean)
+ Filter Vectorization:
+ className: VectorFilterOperator
+ native: true
+ predicateExpression: FilterLongColGreaterLongScalar(col 13:bigint, val 1)
+ predicate: (_col13 > 1L) (type: boolean)
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: decimal(15,9)), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: decimal(20,4)), _col13 (type: double), _col14 (type: double)
+ expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), (CAST( _col4 AS decimal(15,9)) / _col1) (type: decimal(35,29)), power(((_col5 - ((_col6 * _col6) / _col1)) / _col1), 0.5) (type: double), power(((_col5 - ((_col6 * _col6) / _col1)) / CASE WHEN ((_col1 = 1L)) THEN (null) ELSE ((_col1 - 1)) END), 0.5) (type: double), _col7 (type: bigint), _col8 (type: decimal(16,0)), _col9 (type: decimal(16,0)), _col10 (type: decimal(26,0)), CAST( (CAST( _col10 AS decimal(20,4)) / _col7) AS decimal(20,4)) (type: decimal(20,4)), power(((_col11 - ((_col12 * _col12) / _col7)) / _col7), 0.5) (type: double), power(((_col11 - ((_col12 * _col12) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 1, 2, 3, 4, 16, 17, 18, 7, 8, 9, 10, 23, 19, 25]
+ selectExpressions: DecimalColDivideDecimalColumn(col 14:decimal(15,9), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 4:decimal(21,5)) -> 14:decimal(15,9), CastLongToDecimal(col 1:bigint) -> 15:decimal(19,0)) -> 16:decimal(35,29), FuncPowerDoubleToDouble(col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 18:double)(children: DoubleColDivideLongColumn(col 17:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 17:double) -> 18:double) -> 17:double) -> 18:double) -> 17:double, FuncPowerDoubleToDouble(col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 22:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 1:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 18:double) -> 19:double) -> 18:double, IfExprNullCondExpr(col 20:boolean, null, col 21:bigint)(children: LongColEqualLongScalar(col 1:bigint, val 1) -> 20:boolean, LongColSubtractLongScalar(col 1:bigint, val 1) -> 21:bigint) -> 22:bigint) -> 19:double) -> 18:double, CastDecimalToDecimal(col 24:decimal(38,22))(children: DecimalColDivideDecimalColumn(col 23:decimal(20,4), col 15:decimal(19,0))(children: CastDecimalToDecimal(col 10:decimal(26,0)) -> 23:decimal(20,4), CastLongToDecimal(col 7:bigint) -> 15:decimal(19,0)) -> 24:decimal(38,22)) -> 23:decimal(20,4), FuncPowerDoubleToDouble(col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 25:double)(children: DoubleColDivideLongColumn(col 19:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 19:double) -> 25:double) -> 19:double) -> 25:double) -> 19:double, FuncPowerDoubleToDouble(col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 28:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 25:double) -> 26:double) -> 25:double, IfExprNullCondExpr(col 22:boolean, null, col 27:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 22:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 27:bigint) -> 28:bigint) -> 26:double) -> 25:double
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
Statistics: Num rows: 2048 Data size: 443651 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -741,14 +799,14 @@ POSTHOOK: query: SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby_small
#### A masked pattern was here ####
--3728 5 -515.62107 -3367.65176 -13986.22811 -2797.245622000 1140.812276 1275.466899351126 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621
--563 2 -515.62107 -3367.65176 -3883.27283 -1941.636415000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596
-253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.339923750 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613
-528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.351935137 5555.7621107931345 5558.482190324908 1024 6984454 -11710 13948890 13621.9629 308443.09823296947 308593.8156122219
-626923679 1024 9723.40270 -9778.95135 10541.05247 10.293996553 5742.091453325366 5744.897264122336 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185
-6981 2 -515.62107 -515.62107 -1031.24214 -515.621070000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175
-762 1 1531.21941 1531.21941 1531.21941 1531.219410000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881
-NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.608110000 5695.483083909642 5696.410309489072 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
+-3728 5 -515.62107 -3367.65176 -13986.22811 -2797.24562200000000000000000000000 1140.8122759999992 1275.466899351125 6 6984454 -4033 6967704 1161284.0000 2604201.0914565204 2852759.364140621
+-563 2 -515.62107 -3367.65176 -3883.27283 -1941.63641500000000000000000000000 1426.0153450000003 2016.6902410511484 2 -618 -4033 -4651 -2325.5000 1707.5 2414.7696577520596
+253665376 1024 9767.00541 -9779.54865 -347484.08192 -339.33992375000000000000000000000 5708.956347957812 5711.745967644425 1024 11698 -11713 -416183 -406.4287 6837.6426468206855 6840.983786842613
+528534767 1022 9777.75676 -9777.15946 -16711.67771 -16.35193513698630136986301369863 5555.762110793133 5558.482190324906 1024 6984454 -11710 13948890 13621.9629 308443.0982329696 308593.815612222
+626923679 1024 9723.40270 -9778.95135 10541.05247 10.29399655273437500000000000000 5742.091453325365 5744.897264122335 1024 11646 -11712 12641 12.3447 6877.306686989158 6880.6672084147185
+6981 2 -515.62107 -515.62107 -1031.24214 -515.62107000000000000000000000000 0.0 0.0 3 6984454 -618 6983218 2327739.3333 3292794.518850853 4032833.1995089175
+762 1 1531.21941 1531.21941 1531.21941 1531.21941000000000000000000000000 0.0 NULL 2 6984454 1834 6986288 3493144.0000 3491310.0 4937457.95244881
+NULL 3072 9318.43514 -4298.15135 5018444.11392 1633.60811000000000000000000000000 5695.4830839098695 5696.410309489299 3072 11161 -5148 6010880 1956.6667 6821.647911041892 6822.758476439734
PREHOOK: query: SELECT SUM(HASH(*))
FROM (SELECT cint,
COUNT(cdecimal1), MAX(cdecimal1), MIN(cdecimal1), SUM(cdecimal1), AVG(cdecimal1), STDDEV_POP(cdecimal1), STDDEV_SAMP(cdecimal1),
@@ -767,4 +825,4 @@ FROM (SELECT cint,
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_vgby_small
#### A masked pattern was here ####
-91757235680
+96966670826
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
index 18b903b3c3..50e4305b2e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out
@@ -586,9 +586,9 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: avg(dec), sum(dec)
+ aggregations: sum(dec), count(dec)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -596,7 +596,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -605,8 +605,8 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: [0, 1]
- Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col0 (type: struct), _col1 (type: decimal(30,10))
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -636,14 +636,14 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 2
- dataColumns: VALUE._col0:struct, VALUE._col1:decimal(30,10)
+ dataColumns: VALUE._col0:decimal(30,10), VALUE._col1:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalFinal(col 0:struct) -> decimal(24,14), VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 1:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
@@ -651,17 +651,26 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
+ Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 4]
+ selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(30,10), col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 2:decimal(19,0)) -> 3:decimal(38,18), CastDecimalToDecimal(col 0:decimal(30,10)) -> 4:decimal(30,10)
Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -677,7 +686,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_precision
#### A masked pattern was here ####
-88499534.57586576220645 2743485571.8518386284
+88499534.575865762206451613 2743485571.8518386284
PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@decimal_precision
@@ -1196,9 +1205,9 @@ STAGE PLANS:
projectedOutputColumnNums: [0]
Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(dec), sum(dec)
+ aggregations: sum(dec), count(dec)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct, VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
native: false
@@ -1206,7 +1215,7 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
Reduce Sink Vectorization:
@@ -1215,8 +1224,8 @@ STAGE PLANS:
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
valueColumnNums: [0, 1]
- Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col0 (type: struct), _col1 (type: decimal(30,10))
+ Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: decimal(30,10)), _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -1247,14 +1256,14 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 2
- dataColumns: VALUE._col0:struct, VALUE._col1:decimal(30,10)
+ dataColumns: VALUE._col0:decimal(30,10), VALUE._col1:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalFinal(col 0:struct) -> decimal(24,14), VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10)
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 1:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
native: false
@@ -1262,17 +1271,26 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- File Sink Vectorization:
- className: VectorFileSinkOperator
- native: false
- Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: (_col0 / _col1) (type: decimal(38,18)), CAST( _col0 AS decimal(30,10)) (type: decimal(30,10))
+ outputColumnNames: _col0, _col1
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [3, 4]
+ selectExpressions: DecimalColDivideDecimalColumn(col 0:decimal(30,10), col 2:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 2:decimal(19,0)) -> 3:decimal(38,18), CastDecimalToDecimal(col 0:decimal(30,10)) -> 4:decimal(30,10)
+ Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 232 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -1288,7 +1306,7 @@ POSTHOOK: query: SELECT avg(`dec`), sum(`dec`) FROM DECIMAL_PRECISION_txt_small
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_precision_txt_small
#### A masked pattern was here ####
-88499534.57586576220645 2743485571.8518386284
+88499534.575865762206451613 2743485571.8518386284
PREHOOK: query: SELECT `dec` * cast('12345678901234567890.12345678' as decimal(38,18)) FROM DECIMAL_PRECISION_txt_small LIMIT 1
PREHOOK: type: QUERY
PREHOOK: Input: default@decimal_precision_txt_small
diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
index 59b3c4a017..c6867f882b 100644
--- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out
@@ -2297,19 +2297,19 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(key), count(key), avg(key)
+ aggregations: sum(key), count(key)
Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint, VectorUDAFAvgDecimal(col 0:decimal(20,10)) -> struct
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(20,10)) -> decimal(30,10), VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1]
keys: value (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 18 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -2319,9 +2319,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3]
- Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct)
+ valueColumnNums: [1, 2]
+ Statistics: Num rows: 18 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -2350,33 +2350,33 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY._col0:int, VALUE._col0:decimal(30,10), VALUE._col1:bigint, VALUE._col2:struct
+ dataColumnCount: 3
+ dataColumns: KEY._col0:int, VALUE._col0:decimal(30,10), VALUE._col1:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(24,14)
+ aggregators: VectorUDAFSumDecimal(col 1:decimal(30,10)) -> decimal(30,10), VectorUDAFCountMerge(col 2:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:int
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 17 Data size: 4012 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 17 Data size: 2108 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10))
+ expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), (CAST( _col1 AS decimal(24,14)) / _col2) (type: decimal(38,28)), _col1 (type: decimal(30,10))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 5, 3, 1]
- selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(30,10), col 4:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,18)
+ projectedOutputColumnNums: [0, 4, 6, 1]
+ selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(30,10), col 3:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 4:decimal(38,18), DecimalColDivideDecimalColumn(col 5:decimal(24,14), col 3:decimal(19,0))(children: CastDecimalToDecimal(col 1:decimal(30,10)) -> 5:decimal(24,14), CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 6:decimal(38,28)
Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -2386,9 +2386,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [5, 3, 1]
+ valueColumnNums: [4, 6, 1]
Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10))
+ value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(38,28)), _col3 (type: decimal(30,10))
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -2401,12 +2401,12 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18), VALUE._col1:decimal(24,14), VALUE._col2:decimal(30,10)
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,18), VALUE._col1:decimal(38,28), VALUE._col2:decimal(30,10)
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10))
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(38,28)), VALUE._col2 (type: decimal(30,10))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
@@ -2438,23 +2438,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_udf
#### A masked pattern was here ####
--1 -1.120000000000000000 -1.12000000000000 -2.2400000000
--11 -1.122000000000000000 -1.12200000000000 -1.1220000000
--1234567890 -1234567890.123456789000000000 -1234567890.12345678900000 -1234567890.1234567890
--1255 -1255.490000000000000000 -1255.49000000000000 -1255.4900000000
-0 0.025384615384615385 0.02538461538462 0.3300000000
-1 1.048400000000000000 1.04840000000000 5.2420000000
-10 10.000000000000000000 10.00000000000000 10.0000000000
-100 100.000000000000000000 100.00000000000000 100.0000000000
-1234567890 1234567890.123456780000000000 1234567890.12345678000000 1234567890.1234567800
-124 124.000000000000000000 124.00000000000000 124.0000000000
-125 125.200000000000000000 125.20000000000000 125.2000000000
-2 2.000000000000000000 2.00000000000000 4.0000000000
-20 20.000000000000000000 20.00000000000000 20.0000000000
-200 200.000000000000000000 200.00000000000000 200.0000000000
-3 3.140000000000000000 3.14000000000000 9.4200000000
-4 3.140000000000000000 3.14000000000000 3.1400000000
-4400 -4400.000000000000000000 -4400.00000000000000 -4400.0000000000
+-1 -1.120000000000000000 -1.1200000000000000000000000000 -2.2400000000
+-11 -1.122000000000000000 -1.1220000000000000000000000000 -1.1220000000
+-1234567890 -1234567890.123456789000000000 -1234567890.1234567890000000000000000000 -1234567890.1234567890
+-1255 -1255.490000000000000000 -1255.4900000000000000000000000000 -1255.4900000000
+0 0.025384615384615385 0.0253846153846153846153846154 0.3300000000
+1 1.048400000000000000 1.0484000000000000000000000000 5.2420000000
+10 10.000000000000000000 10.0000000000000000000000000000 10.0000000000
+100 100.000000000000000000 100.0000000000000000000000000000 100.0000000000
+1234567890 1234567890.123456780000000000 1234567890.1234567800000000000000000000 1234567890.1234567800
+124 124.000000000000000000 124.0000000000000000000000000000 124.0000000000
+125 125.200000000000000000 125.2000000000000000000000000000 125.2000000000
+2 2.000000000000000000 2.0000000000000000000000000000 4.0000000000
+20 20.000000000000000000 20.0000000000000000000000000000 20.0000000000
+200 200.000000000000000000 200.0000000000000000000000000000 200.0000000000
+3 3.140000000000000000 3.1400000000000000000000000000 9.4200000000
+4 3.140000000000000000 3.1400000000000000000000000000 3.1400000000
+4400 -4400.000000000000000000 -4400.0000000000000000000000000000 -4400.0000000000
NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT -key FROM DECIMAL_UDF
@@ -3229,27 +3229,28 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct]
Select Operator
- expressions: key (type: decimal(20,10)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [1, 0, 3, 6]
+ selectExpressions: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 4:double, CastDecimalToDouble(col 0:decimal(20,10)) -> 5:double) -> 6:double
Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: stddev(key), variance(key)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: stddev, VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: variance
+ aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
- keys: value (type: int)
+ projectedOutputColumnNums: [0, 1, 2]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -3259,9 +3260,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2]
- Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ valueColumnNums: [1, 2, 3]
+ Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -3278,28 +3279,56 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: key:decimal(20,10), value:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: stddev(VALUE._col0), variance(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 17 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 6]
+ selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 6:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 6:double) -> 5:double) -> 6:double
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -3319,8 +3348,8 @@ POSTHOOK: Input: default@decimal_udf
-11 0.0 0.0
-1234567890 0.0 0.0
-1255 0.0 0.0
-0 0.22561046704494161 0.050900082840236685
-1 0.05928102563215321 0.0035142400000000066
+0 0.22561046704494161 0.05090008284023669
+1 0.05928102563215448 0.003514240000000157
10 0.0 0.0
100 0.0 0.0
1234567890 0.0 0.0
@@ -3364,27 +3393,28 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:key:decimal(20,10), 1:value:int, 2:ROW__ID:struct]
Select Operator
- expressions: key (type: decimal(20,10)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(20,10)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [1, 0, 3, 6]
+ selectExpressions: CastDecimalToDouble(col 0:decimal(20,10)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(20,10)) -> 4:double, CastDecimalToDouble(col 0:decimal(20,10)) -> 5:double) -> 6:double
Statistics: Num rows: 39 Data size: 4412 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: stddev_samp(key), var_samp(key)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: stddev_samp, VectorUDAFVarDecimal(col 0:decimal(20,10)) -> struct aggregation: var_samp
+ aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(20,10)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
- keys: value (type: int)
+ projectedOutputColumnNums: [0, 1, 2]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -3394,9 +3424,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2]
- Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ valueColumnNums: [1, 2, 3]
+ Statistics: Num rows: 18 Data size: 504 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -3413,28 +3443,56 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: key:decimal(20,10), value:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 17 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 9]
+ selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double, IfExprNullCondExpr(col 6:boolean, null, col 7:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 6:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 7:bigint) -> 8:bigint) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 9:double) -> 5:double, IfExprNullCondExpr(col 8:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 9:double
Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 17 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -3454,8 +3512,8 @@ POSTHOOK: Input: default@decimal_udf
-11 NULL NULL
-1234567890 NULL NULL
-1255 NULL NULL
-0 0.2348228191855647 0.055141756410256405
-1 0.06627820154470102 0.004392800000000008
+0 0.23482281918556472 0.05514175641025642
+1 0.06627820154470243 0.0043928000000001965
10 NULL NULL
100 NULL NULL
1234567890 NULL NULL
@@ -6235,18 +6293,18 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1]
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: sum(key), count(key), avg(key)
+ aggregations: sum(key), count(key)
Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 0:decimal(15,3)) -> decimal(25,3), VectorUDAFCount(col 0:decimal(15,3)) -> bigint, VectorUDAFAvgDecimal(col 0:decimal(15,3)) -> struct
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(15,3)) -> decimal(25,3), VectorUDAFCount(col 0:decimal(15,3)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1]
keys: value (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -6257,9 +6315,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2, 3]
+ valueColumnNums: [1, 2]
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint), _col3 (type: struct)
+ value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -6289,33 +6347,33 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY._col0:int, VALUE._col0:decimal(25,3), VALUE._col1:bigint, VALUE._col2:struct
+ dataColumnCount: 3
+ dataColumns: KEY._col0:int, VALUE._col0:decimal(25,3), VALUE._col1:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), count(VALUE._col1), avg(VALUE._col2)
+ aggregations: sum(VALUE._col0), count(VALUE._col1)
Group By Vectorization:
- aggregators: VectorUDAFSumDecimal(col 1:decimal(25,3)) -> decimal(25,3), VectorUDAFCountMerge(col 2:bigint) -> bigint, VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(19,7)
+ aggregators: VectorUDAFSumDecimal(col 1:decimal(25,3)) -> decimal(25,3), VectorUDAFCountMerge(col 2:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:int
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1, 2]
+ projectedOutputColumnNums: [0, 1]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,16)), _col3 (type: decimal(19,7)), _col1 (type: decimal(25,3))
+ expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,16)), (CAST( _col1 AS decimal(19,7)) / _col2) (type: decimal(38,26)), _col1 (type: decimal(25,3))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 5, 3, 1]
- selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(25,3), col 4:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 4:decimal(19,0)) -> 5:decimal(38,16)
+ projectedOutputColumnNums: [0, 4, 6, 1]
+ selectExpressions: DecimalColDivideDecimalColumn(col 1:decimal(25,3), col 3:decimal(19,0))(children: CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 4:decimal(38,16), DecimalColDivideDecimalColumn(col 5:decimal(19,7), col 3:decimal(19,0))(children: CastDecimalToDecimal(col 1:decimal(25,3)) -> 5:decimal(19,7), CastLongToDecimal(col 2:bigint) -> 3:decimal(19,0)) -> 6:decimal(38,26)
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -6325,9 +6383,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [5, 3, 1]
+ valueColumnNums: [4, 6, 1]
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(19,7)), _col3 (type: decimal(25,3))
+ value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(38,26)), _col3 (type: decimal(25,3))
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -6340,12 +6398,12 @@ STAGE PLANS:
vectorized: true
rowBatchContext:
dataColumnCount: 4
- dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,16), VALUE._col1:decimal(19,7), VALUE._col2:decimal(25,3)
+ dataColumns: KEY.reducesinkkey0:int, VALUE._col0:decimal(38,16), VALUE._col1:decimal(38,26), VALUE._col2:decimal(25,3)
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,16)), VALUE._col1 (type: decimal(19,7)), VALUE._col2 (type: decimal(25,3))
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,16)), VALUE._col1 (type: decimal(38,26)), VALUE._col2 (type: decimal(25,3))
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
@@ -6377,23 +6435,23 @@ POSTHOOK: query: SELECT value, sum(key) / count(key), avg(key), sum(key) FROM DE
POSTHOOK: type: QUERY
POSTHOOK: Input: default@decimal_udf_txt_small
#### A masked pattern was here ####
--1 -1.1200000000000000 -1.1200000 -2.240
--11 -1.1220000000000000 -1.1220000 -1.122
--1234567890 -1234567890.1230000000000000 -1234567890.1230000 -1234567890.123
--1255 -1255.4900000000000000 -1255.4900000 -1255.490
-0 0.0253846153846154 0.0253846 0.330
-1 1.0484000000000000 1.0484000 5.242
-10 10.0000000000000000 10.0000000 10.000
-100 100.0000000000000000 100.0000000 100.000
-1234567890 1234567890.1230000000000000 1234567890.1230000 1234567890.123
-124 124.0000000000000000 124.0000000 124.000
-125 125.2000000000000000 125.2000000 125.200
-2 2.0000000000000000 2.0000000 4.000
-20 20.0000000000000000 20.0000000 20.000
-200 200.0000000000000000 200.0000000 200.000
-3 3.1400000000000000 3.1400000 9.420
-4 3.1400000000000000 3.1400000 3.140
-4400 -4400.0000000000000000 -4400.0000000 -4400.000
+-1 -1.1200000000000000 -1.12000000000000000000000000 -2.240
+-11 -1.1220000000000000 -1.12200000000000000000000000 -1.122
+-1234567890 -1234567890.1230000000000000 -1234567890.12300000000000000000000000 -1234567890.123
+-1255 -1255.4900000000000000 -1255.49000000000000000000000000 -1255.490
+0 0.0253846153846154 0.02538461538461538461538462 0.330
+1 1.0484000000000000 1.04840000000000000000000000 5.242
+10 10.0000000000000000 10.00000000000000000000000000 10.000
+100 100.0000000000000000 100.00000000000000000000000000 100.000
+1234567890 1234567890.1230000000000000 1234567890.12300000000000000000000000 1234567890.123
+124 124.0000000000000000 124.00000000000000000000000000 124.000
+125 125.2000000000000000 125.20000000000000000000000000 125.200
+2 2.0000000000000000 2.00000000000000000000000000 4.000
+20 20.0000000000000000 20.00000000000000000000000000 20.000
+200 200.0000000000000000 200.00000000000000000000000000 200.000
+3 3.1400000000000000 3.14000000000000000000000000 9.420
+4 3.1400000000000000 3.14000000000000000000000000 3.140
+4400 -4400.0000000000000000 -4400.00000000000000000000000000 -4400.000
NULL NULL NULL NULL
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT -key FROM DECIMAL_UDF_txt_small
@@ -7173,26 +7231,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:key:decimal(15,3), 1:value:int, 2:ROW__ID:struct]
Select Operator
- expressions: key (type: decimal(15,3)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(15,3)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [1, 0, 3, 6]
+ selectExpressions: CastDecimalToDouble(col 0:decimal(15,3)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(15,3)) -> 4:double, CastDecimalToDouble(col 0:decimal(15,3)) -> 5:double) -> 6:double
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev(key), variance(key)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: stddev, VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: variance
+ aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(15,3)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
- keys: value (type: int)
+ projectedOutputColumnNums: [0, 1, 2]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -7203,9 +7262,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2]
+ valueColumnNums: [1, 2, 3]
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -7223,28 +7282,56 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: key:decimal(15,3), value:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: stddev(VALUE._col0), variance(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / _col3), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / _col3) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 6]
+ selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 6:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 6:double) -> 5:double) -> 6:double
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -7264,8 +7351,8 @@ POSTHOOK: Input: default@decimal_udf_txt_small
-11 0.0 0.0
-1234567890 0.0 0.0
-1255 0.0 0.0
-0 0.22561046704494161 0.050900082840236685
-1 0.05928102563215321 0.0035142400000000066
+0 0.22561046704494161 0.05090008284023669
+1 0.05928102563215448 0.003514240000000157
10 0.0 0.0
100 0.0 0.0
1234567890 0.0 0.0
@@ -7309,26 +7396,27 @@ STAGE PLANS:
native: true
vectorizationSchemaColumns: [0:key:decimal(15,3), 1:value:int, 2:ROW__ID:struct]
Select Operator
- expressions: key (type: decimal(15,3)), value (type: int)
- outputColumnNames: key, value
+ expressions: value (type: int), key (type: decimal(15,3)), UDFToDouble(key) (type: double), (UDFToDouble(key) * UDFToDouble(key)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [1, 0, 3, 6]
+ selectExpressions: CastDecimalToDouble(col 0:decimal(15,3)) -> 3:double, DoubleColMultiplyDoubleColumn(col 4:double, col 5:double)(children: CastDecimalToDouble(col 0:decimal(15,3)) -> 4:double, CastDecimalToDouble(col 0:decimal(15,3)) -> 5:double) -> 6:double
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: stddev_samp(key), var_samp(key)
+ aggregations: sum(_col3), sum(_col2), count(_col1)
Group By Vectorization:
- aggregators: VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: stddev_samp, VectorUDAFVarDecimal(col 0:decimal(15,3)) -> struct aggregation: var_samp
+ aggregators: VectorUDAFSumDouble(col 6:double) -> double, VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCount(col 0:decimal(15,3)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 1:int
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
- keys: value (type: int)
+ projectedOutputColumnNums: [0, 1, 2]
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
@@ -7339,9 +7427,9 @@ STAGE PLANS:
keyColumnNums: [0]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [1, 2]
+ valueColumnNums: [1, 2, 3]
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct), _col2 (type: struct)
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
Map Vectorization:
@@ -7359,28 +7447,56 @@ STAGE PLANS:
includeColumns: [0, 1]
dataColumns: key:decimal(15,3), value:int
partitionColumnCount: 0
- scratchColumnTypeNames: []
+ scratchColumnTypeNames: [double, double, double, double]
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Vectorization:
enabled: true
enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true
- notVectorizedReason: GROUPBY operator: Vector aggregation : "stddev_samp" for input type: "STRUCT" and output type: "DOUBLE" and mode: FINAL not supported for evaluator GenericUDAFStdSampleEvaluator
- vectorized: false
+ reduceColumnNullOrder: a
+ reduceColumnSortOrder: +
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 4
+ dataColumns: KEY._col0:int, VALUE._col0:double, VALUE._col1:double, VALUE._col2:bigint
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: stddev_samp(VALUE._col0), var_samp(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1), count(VALUE._col2)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDouble(col 1:double) -> double, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ keyExpressions: col 0:int
+ native: false
+ vectorProcessingMode: MERGE_PARTIAL
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2
+ outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), power(((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), ((_col1 - ((_col2 * _col2) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [0, 4, 9]
+ selectExpressions: FuncPowerDoubleToDouble(col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 5:double)(children: DoubleColDivideLongColumn(col 4:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 4:double) -> 5:double) -> 4:double, IfExprNullCondExpr(col 6:boolean, null, col 7:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 6:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 7:bigint) -> 8:bigint) -> 5:double) -> 4:double, DoubleColDivideLongColumn(col 5:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 1:double, col 9:double)(children: DoubleColDivideLongColumn(col 5:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 2:double, col 2:double) -> 5:double) -> 9:double) -> 5:double, IfExprNullCondExpr(col 8:boolean, null, col 10:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 8:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 10:bigint) -> 11:bigint) -> 9:double
Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -7400,8 +7516,8 @@ POSTHOOK: Input: default@decimal_udf_txt_small
-11 NULL NULL
-1234567890 NULL NULL
-1255 NULL NULL
-0 0.2348228191855647 0.055141756410256405
-1 0.06627820154470102 0.004392800000000008
+0 0.23482281918556472 0.05514175641025642
+1 0.06627820154470243 0.0043928000000001965
10 NULL NULL
100 NULL NULL
1234567890 NULL NULL
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
index 80ecd59a16..0eca143451 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out
@@ -69,24 +69,24 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble
vectorized: false
Reducer 2
Execution mode: vectorized, llap
@@ -99,33 +99,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 5, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -177,24 +178,24 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
sort order: +++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble
vectorized: false
Reducer 2
Execution mode: vectorized, llap
@@ -207,33 +208,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 5, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -311,24 +313,24 @@ STAGE PLANS:
outputColumnNames: a, b, c
Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c), count()
+ aggregations: sum(c), count(c), count()
keys: a (type: string), b (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
Statistics: Num rows: 12 Data size: 6624 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: bigint)
Execution mode: llap
LLAP IO: all inputs
Map Vectorization:
enabled: true
enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true
inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
- notVectorizedReason: GROUPBY operator: Vector aggregation : "avg" for input type: "BYTES" and output type: "STRUCT" and mode: PARTIAL1 not supported for evaluator GenericUDAFAverageEvaluatorDouble
+ notVectorizedReason: GROUPBY operator: Vector aggregation : "sum" for input type: "BYTES" and output type: "DOUBLE" and mode: PARTIAL1 not supported for evaluator GenericUDAFSumDouble
vectorized: false
Reducer 2
Execution mode: vectorized, llap
@@ -341,24 +343,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 5
+ dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgPartial2(col 2:struct) -> struct, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ aggregators: VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: PARTIALS
- keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
native: false
vectorProcessingMode: STREAMING
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint)
mode: partials
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
@@ -369,9 +371,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 4]
+ valueColumnNums: [3, 4, 5]
Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -383,33 +385,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:double, VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgFinal(col 3:struct) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDouble(col 3:double) -> double, VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: FINAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: STREAMING
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: final
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: double), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 5, 4]
+ selectExpressions: DoubleColDivideLongColumn(col 2:double, col 3:bigint) -> 5:double
Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out
index dabc987e6b..74caa3fee9 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out
@@ -76,18 +76,18 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c_dec), count()
+ aggregations: sum(c_dec), count(c_dec), count()
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
@@ -98,9 +98,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 4]
+ valueColumnNums: [3, 4, 5]
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -129,33 +129,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 6, 4]
+ selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20)
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -214,18 +215,18 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c_dec), count()
+ aggregations: sum(c_dec), count(c_dec), count()
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: a (type: string), b (type: string), 0L (type: bigint)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
@@ -236,9 +237,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 4]
+ valueColumnNums: [3, 4, 5]
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -267,33 +268,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: MERGEPARTIAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: MERGE_PARTIAL
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 6, 4]
+ selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20)
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -321,22 +323,22 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
a b _c2 _c3
-1 1 3.000000 2
-1 2 2.000000 1
-1 NULL 2.666667 3
-2 2 5.333333 3
-2 3 5.000000 2
-2 NULL 5.200000 5
-3 2 8.000000 1
-3 NULL 8.000000 1
-5 1 2.000000 1
-5 NULL 2.000000 1
-8 1 1.000000 2
-8 NULL 1.000000 2
-NULL 1 2.000000 5
-NULL 2 5.200000 5
-NULL 3 5.000000 2
-NULL NULL 3.833333 12
+1 1 3.00000000000000000000 2
+1 2 2.00000000000000000000 1
+1 NULL 2.66666666666666666667 3
+2 2 5.33333333333333333333 3
+2 3 5.00000000000000000000 2
+2 NULL 5.20000000000000000000 5
+3 2 8.00000000000000000000 1
+3 NULL 8.00000000000000000000 1
+5 1 2.00000000000000000000 1
+5 NULL 2.00000000000000000000 1
+8 1 1.00000000000000000000 2
+8 NULL 1.00000000000000000000 2
+NULL 1 2.00000000000000000000 5
+NULL 2 5.20000000000000000000 5
+NULL 3 5.00000000000000000000 2
+NULL NULL 3.83333333333333333333 12
PREHOOK: query: EXPLAIN VECTORIZATION DETAIL
SELECT a, b, avg(c_dec), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
@@ -378,18 +380,18 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2]
Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: avg(c_dec), count()
+ aggregations: sum(c_dec), count(c_dec), count()
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimal(col 2:decimal(10,2)) -> struct, VectorUDAFCountStar(*) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 2:decimal(10,2)) -> decimal(20,2), VectorUDAFCount(col 2:decimal(10,2)) -> bigint, VectorUDAFCountStar(*) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: col 0:string, col 1:string
native: false
vectorProcessingMode: HASH
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: a (type: string), b (type: string)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string)
@@ -400,9 +402,9 @@ STAGE PLANS:
keyColumnNums: [0, 1]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [2, 3]
+ valueColumnNums: [2, 3, 4]
Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col2 (type: struct), _col3 (type: bigint)
+ value expressions: _col2 (type: decimal(20,2)), _col3 (type: bigint), _col4 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: all inputs
Map Vectorization:
@@ -431,24 +433,24 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 4
- dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 5
+ dataColumns: KEY._col0:string, KEY._col1:string, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: [bigint]
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalPartial2(col 2:struct) -> struct, VectorUDAFCountMerge(col 3:bigint) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 2:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 3:bigint) -> bigint, VectorUDAFCountMerge(col 4:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: PARTIALS
- keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 4:bigint
+ keyExpressions: col 0:string, col 1:string, ConstantVectorExpression(val 0) -> 5:bigint
native: false
vectorProcessingMode: STREAMING
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), 0L (type: bigint)
mode: partials
- outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint)
@@ -459,9 +461,9 @@ STAGE PLANS:
keyColumnNums: [0, 1, 2]
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- valueColumnNums: [3, 4]
+ valueColumnNums: [3, 4, 5]
Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col3 (type: struct), _col4 (type: bigint)
+ value expressions: _col3 (type: decimal(20,2)), _col4 (type: bigint), _col5 (type: bigint)
Reducer 3
Execution mode: vectorized, llap
Reduce Vectorization:
@@ -473,33 +475,34 @@ STAGE PLANS:
usesVectorUDFAdaptor: false
vectorized: true
rowBatchContext:
- dataColumnCount: 5
- dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:struct, VALUE._col1:bigint
+ dataColumnCount: 6
+ dataColumns: KEY._col0:string, KEY._col1:string, KEY._col2:bigint, VALUE._col0:decimal(20,2), VALUE._col1:bigint, VALUE._col2:bigint
partitionColumnCount: 0
scratchColumnTypeNames: []
Reduce Operator Tree:
Group By Operator
- aggregations: avg(VALUE._col0), count(VALUE._col1)
+ aggregations: sum(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
Group By Vectorization:
- aggregators: VectorUDAFAvgDecimalFinal(col 3:struct) -> decimal(14,6), VectorUDAFCountMerge(col 4:bigint) -> bigint
+ aggregators: VectorUDAFSumDecimal(col 3:decimal(20,2)) -> decimal(20,2), VectorUDAFCountMerge(col 4:bigint) -> bigint, VectorUDAFCountMerge(col 5:bigint) -> bigint
className: VectorGroupByOperator
groupByMode: FINAL
keyExpressions: col 0:string, col 1:string, col 2:bigint
native: false
vectorProcessingMode: STREAMING
- projectedOutputColumnNums: [0, 1]
+ projectedOutputColumnNums: [0, 1, 2]
keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint)
mode: final
- outputColumnNames: _col0, _col1, _col3, _col4
+ outputColumnNames: _col0, _col1, _col3, _col4, _col5
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
pruneGroupingSetId: true
Select Operator
- expressions: _col0 (type: string), _col1 (type: string), _col3 (type: decimal(14,6)), _col4 (type: bigint)
+ expressions: _col0 (type: string), _col1 (type: string), (_col3 / _col4) (type: decimal(38,20)), _col5 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3
Select Vectorization:
className: VectorSelectOperator
native: true
- projectedOutputColumnNums: [0, 1, 2, 3]
+ projectedOutputColumnNums: [0, 1, 6, 4]
+ selectExpressions: DecimalColDivideDecimalColumn(col 2:decimal(20,2), col 5:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 5:decimal(19,0)) -> 6:decimal(38,20)
Statistics: Num rows: 24 Data size: 11520 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
@@ -527,19 +530,19 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
a b _c2 _c3
-1 1 3.000000 2
-1 2 2.000000 1
-1 NULL 2.666667 3
-2 2 5.333333 3
-2 3 5.000000 2
-2 NULL 5.200000 5
-3 2 8.000000 1
-3 NULL 8.000000 1
-5 1 2.000000 1
-5 NULL 2.000000 1
-8 1 1.000000 2
-8 NULL 1.000000 2
-NULL 1 2.000000 5
-NULL 2 5.200000 5
-NULL 3 5.000000 2
-NULL NULL 3.833333 12
+1 1 3.00000000000000000000 2
+1 2 2.00000000000000000000 1
+1 NULL 2.66666666666666666667 3
+2 2 5.33333333333333333333 3
+2 3 5.00000000000000000000 2
+2 NULL 5.20000000000000000000 5
+3 2 8.00000000000000000000 1
+3 NULL 8.00000000000000000000 1
+5 1 2.00000000000000000000 1
+5 NULL 2.00000000000000000000 1
+8 1 1.00000000000000000000 2
+8 NULL 1.00000000000000000000 2
+NULL 1 2.00000000000000000000 5
+NULL 2 5.20000000000000000000 5
+NULL 3 5.00000000000000000000 2
+NULL NULL 3.83333333333333333333 12
diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
index c21a4fc091..d90ebf0634 100644
--- a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out
@@ -822,9 +822,9 @@ STAGE PLANS:
projectedOutputColumnNums: [0, 1, 2, 3]
Statistics: Num rows: 6 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_col1), sum(_col2), avg(_col2), sum(_col3), avg(_col3), sum(_col4), avg(_col4)
+ aggregations: sum(_col1), sum(_col2), count(_col2), sum(_col3), count(_col3), sum(_col4), count(_col4)
Group By Vectorization:
- aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFSumLong(col 1:int) -> bigint, VectorUDAFAvgLong(col 1:int) -> struct, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFAvgDouble(col 2:double) -> struct, VectorUDAFSumDecimal(col 3:decimal(38,18)) -> decimal(38,18), VectorUDAFAvgDecimal(col 3:decimal(38,18)) -> struct
+ aggregators: VectorUDAFSumLong(col 0:int) -> bigint, VectorUDAFSumLong(col 1:int) -> bigint, VectorUDAFCount(col 1:int) -> bigint, VectorUDAFSumDouble(col 2:double) -> double, VectorUDAFCount(col 2:double) -> bigint, VectorUDAFSumDecimal(col 3:decimal(38,18)) -> decimal(38,18), VectorUDAFCount(col 3:decimal(38,18)) -> bigint
className: VectorGroupByOperator
groupByMode: HASH
keyExpressions: ConstantVectorExpression(val 1) -> 4:int
@@ -834,7 +834,7 @@ STAGE PLANS:
keys: 1 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
@@ -843,8 +843,8 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1 Data size: 584 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: struct), _col4 (type: double), _col5 (type: struct), _col6 (type: decimal(38,18)), _col7 (type: struct