diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java
new file mode 100644
index 0000000000..9298e51ebf
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlVarianceAggFunction.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.functions;
+
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+/**
+ * Aggregation function to represent: stddev_pop, stddev_samp, var_pop, var_samp.
+ */
+public class HiveSqlVarianceAggFunction extends SqlAggFunction {
+
+ public HiveSqlVarianceAggFunction(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+ SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) {
+ super(name, null, kind, returnTypeInference, operandTypeInference,
+ operandTypeChecker, SqlFunctionCategory.NUMERIC, false, false);
+ assert kind == SqlKind.STDDEV_POP || kind == SqlKind.STDDEV_SAMP ||
+ kind == SqlKind.VAR_POP || kind == SqlKind.VAR_SAMP;
+ }
+
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
new file mode 100644
index 0000000000..e00317f2ac
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
@@ -0,0 +1,615 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.apache.calcite.plan.RelOptCluster;
+import org.apache.calcite.plan.RelOptRule;
+import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.AggregateCall;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexBuilder;
+import org.apache.calcite.rex.RexLiteral;
+import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.sql.SqlAggFunction;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.fun.SqlStdOperatorTable;
+import org.apache.calcite.sql.type.SqlTypeUtil;
+import org.apache.calcite.tools.RelBuilder;
+import org.apache.calcite.util.CompositeList;
+import org.apache.calcite.util.ImmutableIntList;
+import org.apache.calcite.util.Util;
+import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumEmptyIsZeroAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate;
+
+import java.math.BigDecimal;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * This rule is a copy of {@link org.apache.calcite.rel.rules.AggregateReduceFunctionsRule}
+ * that regenerates Hive specific aggregate operators.
+ *
+ * TODO: When CALCITE-2216 is completed, we should be able to remove much of this code and
+ * just override the relevant methods.
+ *
+ * Planner rule that reduces aggregate functions in
+ * {@link org.apache.calcite.rel.core.Aggregate}s to simpler forms.
+ *
+ *
aggCallList) {
+ for (AggregateCall call : aggCallList) {
+ if (isReducible(call.getAggregation().getKind())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns whether the aggregate call is a reducible function
+ */
+ private boolean isReducible(final SqlKind kind) {
+ if (SqlKind.AVG_AGG_FUNCTIONS.contains(kind)) {
+ return true;
+ }
+ switch (kind) {
+ case SUM:
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in
+ * the aggregates list to.
+ *
+ * It handles newly generated common subexpressions since this was done
+ * at the sql2rel stage.
+ */
+ private void reduceAggs(
+ RelOptRuleCall ruleCall,
+ Aggregate oldAggRel) {
+ RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+
+ List oldCalls = oldAggRel.getAggCallList();
+ final int groupCount = oldAggRel.getGroupCount();
+ final int indicatorCount = oldAggRel.getIndicatorCount();
+
+ final List newCalls = Lists.newArrayList();
+ final Map aggCallMapping = Maps.newHashMap();
+
+ final List projList = Lists.newArrayList();
+
+ // pass through group key (+ indicators if present)
+ for (int i = 0; i < groupCount + indicatorCount; ++i) {
+ projList.add(
+ rexBuilder.makeInputRef(
+ getFieldType(oldAggRel, i),
+ i));
+ }
+
+ // List of input expressions. If a particular aggregate needs more, it
+ // will add an expression to the end, and we will create an extra
+ // project.
+ final RelBuilder relBuilder = ruleCall.builder();
+ relBuilder.push(oldAggRel.getInput());
+ final List inputExprs = new ArrayList<>(relBuilder.fields());
+
+ // create new agg function calls and rest of project list together
+ for (AggregateCall oldCall : oldCalls) {
+ projList.add(
+ reduceAgg(
+ oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs));
+ }
+
+ final int extraArgCount =
+ inputExprs.size() - relBuilder.peek().getRowType().getFieldCount();
+ if (extraArgCount > 0) {
+ relBuilder.project(inputExprs,
+ CompositeList.of(
+ relBuilder.peek().getRowType().getFieldNames(),
+ Collections.nCopies(extraArgCount, null)));
+ }
+ newAggregateRel(relBuilder, oldAggRel, newCalls);
+ relBuilder.project(projList, oldAggRel.getRowType().getFieldNames());
+ ruleCall.transformTo(relBuilder.build());
+ }
+
+ private RexNode reduceAgg(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ final SqlKind kind = oldCall.getAggregation().getKind();
+ if (isReducible(kind)) {
+ switch (kind) {
+ case SUM:
+ // replace original SUM(x) with
+ // case COUNT(x) when 0 then null else SUM0(x) end
+ return reduceSum(oldAggRel, oldCall, newCalls, aggCallMapping);
+ case AVG:
+ // replace original AVG(x) with SUM(x) / COUNT(x)
+ return reduceAvg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs);
+ case STDDEV_POP:
+ // replace original STDDEV_POP(x) with
+ // SQRT(
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / COUNT(x))
+ return reduceStddev(oldAggRel, oldCall, true, true, newCalls,
+ aggCallMapping, inputExprs);
+ case STDDEV_SAMP:
+ // replace original STDDEV_SAMP(x) with
+ // SQRT(
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END)
+ return reduceStddev(oldAggRel, oldCall, false, true, newCalls,
+ aggCallMapping, inputExprs);
+ case VAR_POP:
+ // replace original VAR_POP(x) with
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / COUNT(x)
+ return reduceStddev(oldAggRel, oldCall, true, false, newCalls,
+ aggCallMapping, inputExprs);
+ case VAR_SAMP:
+ // replace original VAR_SAMP(x) with
+ // (SUM(x * x) - SUM(x) * SUM(x) / COUNT(x))
+ // / CASE COUNT(x) WHEN 1 THEN NULL ELSE COUNT(x) - 1 END
+ return reduceStddev(oldAggRel, oldCall, false, false, newCalls,
+ aggCallMapping, inputExprs);
+ default:
+ throw Util.unexpected(kind);
+ }
+ } else {
+ // anything else: preserve original call
+ RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ final int nGroups = oldAggRel.getGroupCount();
+ List oldArgTypes =
+ SqlTypeUtil.projectTypes(
+ oldAggRel.getInput().getRowType(), oldCall.getArgList());
+ return rexBuilder.addAggCall(oldCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ oldArgTypes);
+ }
+ }
+
+ private AggregateCall createAggregateCallWithBinding(
+ RelDataTypeFactory typeFactory,
+ SqlAggFunction aggFunction,
+ RelDataType operandType,
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ int argOrdinal) {
+ final Aggregate.AggCallBinding binding =
+ new Aggregate.AggCallBinding(typeFactory, aggFunction,
+ ImmutableList.of(operandType), oldAggRel.getGroupCount(),
+ oldCall.filterArg >= 0);
+ return AggregateCall.create(aggFunction,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ ImmutableIntList.of(argOrdinal),
+ oldCall.filterArg,
+ aggFunction.inferReturnType(binding),
+ null);
+ }
+
+ private RexNode reduceAvg(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ final int nGroups = oldAggRel.getGroupCount();
+ final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ final int iAvgInput = oldCall.getArgList().get(0);
+ final RelDataType avgInputType =
+ getFieldType(
+ oldAggRel.getInput(),
+ iAvgInput);
+ final AggregateCall sumCall =
+ AggregateCall.create(
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+ final AggregateCall countCall =
+ AggregateCall.create(
+ new HiveSqlCountAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+
+ // NOTE: these references are with respect to the output
+ // of newAggRel
+ RexNode numeratorRef =
+ rexBuilder.addAggCall(sumCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(avgInputType));
+ final RexNode denominatorRef =
+ rexBuilder.addAggCall(countCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(avgInputType));
+
+ final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
+ final RelDataType avgType = typeFactory.createTypeWithNullability(
+ oldCall.getType(), numeratorRef.getType().isNullable());
+ numeratorRef = rexBuilder.ensureType(avgType, numeratorRef, true);
+ final RexNode divideRef =
+ rexBuilder.makeCall(SqlStdOperatorTable.DIVIDE, numeratorRef, denominatorRef);
+ return rexBuilder.makeCast(oldCall.getType(), divideRef);
+ }
+
+ private RexNode reduceSum(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping) {
+ final int nGroups = oldAggRel.getGroupCount();
+ RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ int arg = oldCall.getArgList().get(0);
+ RelDataType argType =
+ getFieldType(
+ oldAggRel.getInput(),
+ arg);
+ final AggregateCall sumZeroCall =
+ AggregateCall.create(
+ new HiveSqlSumEmptyIsZeroAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM0,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ oldCall.name);
+ final AggregateCall countCall =
+ AggregateCall.create(
+ new HiveSqlCountAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel,
+ null,
+ null);
+
+ // NOTE: these references are with respect to the output
+ // of newAggRel
+ RexNode sumZeroRef =
+ rexBuilder.addAggCall(sumZeroCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(argType));
+ if (!oldCall.getType().isNullable()) {
+ // If SUM(x) is not nullable, the validator must have determined that
+ // nulls are impossible (because the group is never empty and x is never
+ // null). Therefore we translate to SUM0(x).
+ return sumZeroRef;
+ }
+ RexNode countRef =
+ rexBuilder.addAggCall(countCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(argType));
+ return rexBuilder.makeCall(SqlStdOperatorTable.CASE,
+ rexBuilder.makeCall(SqlStdOperatorTable.EQUALS,
+ countRef, rexBuilder.makeExactLiteral(BigDecimal.ZERO)),
+ rexBuilder.makeCast(sumZeroRef.getType(), rexBuilder.constantNull()),
+ sumZeroRef);
+ }
+
+ private RexNode reduceStddev(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ boolean biased,
+ boolean sqrt,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ // stddev_pop(x) ==>
+ // power(
+ // (sum(x * x) - sum(x) * sum(x) / count(x))
+ // / count(x),
+ // .5)
+ //
+ // stddev_samp(x) ==>
+ // power(
+ // (sum(x * x) - sum(x) * sum(x) / count(x))
+ // / nullif(count(x) - 1, 0),
+ // .5)
+ final int nGroups = oldAggRel.getGroupCount();
+ final RelOptCluster cluster = oldAggRel.getCluster();
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ final RelDataTypeFactory typeFactory = cluster.getTypeFactory();
+
+ assert oldCall.getArgList().size() == 1 : oldCall.getArgList();
+ final int argOrdinal = oldCall.getArgList().get(0);
+ final RelDataType argOrdinalType = getFieldType(oldAggRel.getInput(), argOrdinal);
+ final RelDataType oldCallType =
+ typeFactory.createTypeWithNullability(oldCall.getType(),
+ argOrdinalType.isNullable());
+
+ final RexNode argRef =
+ rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), true);
+ final int argRefOrdinal = lookupOrAdd(inputExprs, argRef);
+
+ final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY,
+ argRef, argRef);
+ final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared);
+
+ final AggregateCall sumArgSquaredAggCall =
+ createAggregateCallWithBinding(typeFactory,
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal);
+
+ final RexNode sumArgSquared =
+ rexBuilder.addAggCall(sumArgSquaredAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(sumArgSquaredAggCall.getType()));
+
+ final AggregateCall sumArgAggCall =
+ AggregateCall.create(
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ ImmutableIntList.of(argOrdinal),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+
+ final RexNode sumArg =
+ rexBuilder.addAggCall(sumArgAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(sumArgAggCall.getType()));
+ final RexNode sumArgCast = rexBuilder.ensureType(oldCallType, sumArg, true);
+ final RexNode sumSquaredArg =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MULTIPLY, sumArgCast, sumArgCast);
+
+ final AggregateCall countArgAggCall =
+ AggregateCall.create(
+ new HiveSqlCountAggFunction(
+ oldCall.isDistinct(),
+ oldCall.getAggregation().getReturnTypeInference(),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.COUNT,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel,
+ null,
+ null);
+
+ final RexNode countArg =
+ rexBuilder.addAggCall(countArgAggCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(argOrdinalType));
+
+ final RexNode avgSumSquaredArg =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.DIVIDE, sumSquaredArg, countArg);
+
+ final RexNode diff =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MINUS,
+ sumArgSquared, avgSumSquaredArg);
+
+ final RexNode denominator;
+ if (biased) {
+ denominator = countArg;
+ } else {
+ final RexLiteral one =
+ rexBuilder.makeExactLiteral(BigDecimal.ONE);
+ final RexNode nul =
+ rexBuilder.makeCast(countArg.getType(), rexBuilder.constantNull());
+ final RexNode countMinusOne =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.MINUS, countArg, one);
+ final RexNode countEqOne =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.EQUALS, countArg, one);
+ denominator =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.CASE,
+ countEqOne, nul, countMinusOne);
+ }
+
+ final RexNode div =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.DIVIDE, diff, denominator);
+
+ RexNode result = div;
+ if (sqrt) {
+ final RexNode half =
+ rexBuilder.makeExactLiteral(new BigDecimal("0.5"));
+ result =
+ rexBuilder.makeCall(
+ SqlStdOperatorTable.POWER, div, half);
+ }
+
+ return rexBuilder.makeCast(
+ oldCall.getType(), result);
+ }
+
+ /**
+ * Finds the ordinal of an element in a list, or adds it.
+ *
+ * @param list List
+ * @param element Element to lookup or add
+ * @param Element type
+ * @return Ordinal of element in list
+ */
+ private static int lookupOrAdd(List list, T element) {
+ int ordinal = list.indexOf(element);
+ if (ordinal == -1) {
+ ordinal = list.size();
+ list.add(element);
+ }
+ return ordinal;
+ }
+
+ /**
+ * Do a shallow clone of oldAggRel and update aggCalls. Could be refactored
+ * into Aggregate and subclasses - but it's only needed for some
+ * subclasses.
+ *
+ * @param relBuilder Builder of relational expressions; at the top of its
+ * stack is its input
+ * @param oldAggregate LogicalAggregate to clone.
+ * @param newCalls New list of AggregateCalls
+ */
+ protected void newAggregateRel(RelBuilder relBuilder,
+ Aggregate oldAggregate, List newCalls) {
+ relBuilder.aggregate(
+ relBuilder.groupKey(oldAggregate.getGroupSet(),
+ oldAggregate.getGroupSets()),
+ newCalls);
+ }
+
+ private RelDataType getFieldType(RelNode relNode, int i) {
+ final RelDataTypeField inputField =
+ relNode.getRowType().getFieldList().get(i);
+ return inputField.getType();
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index cb0c2b1b35..8a9e66a53d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -49,6 +49,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction;
+import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlVarianceAggFunction;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveConcat;
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
@@ -608,7 +609,42 @@ public static SqlAggFunction getCalciteAggFn(String hiveUdfName, boolean isDisti
udfInfo.returnTypeInference,
udfInfo.operandTypeInference,
udfInfo.operandTypeChecker);
- break;
+ break;
+ case "std":
+ case "stddev":
+ case "stddev_pop":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "stddev_pop",
+ SqlKind.STDDEV_POP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "stddev_samp":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "stddev_samp",
+ SqlKind.STDDEV_SAMP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "variance":
+ case "var_pop":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "var_pop",
+ SqlKind.VAR_POP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
+ case "var_samp":
+ calciteAggFn = new HiveSqlVarianceAggFunction(
+ "var_samp",
+ SqlKind.VAR_SAMP,
+ udfInfo.returnTypeInference,
+ udfInfo.operandTypeInference,
+ udfInfo.operandTypeChecker);
+ break;
default:
calciteAggFn = new CalciteUDAF(
isDistinct,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 3520d90fa8..d90dde992b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -177,6 +177,7 @@
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregatePullUpConstantsRule;
+import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceFunctionsRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateReduceRule;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules;
import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule;
@@ -1821,6 +1822,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
rules.add(HiveReduceExpressionsRule.PROJECT_INSTANCE);
rules.add(HiveReduceExpressionsRule.FILTER_INSTANCE);
rules.add(HiveReduceExpressionsRule.JOIN_INSTANCE);
+ rules.add(HiveAggregateReduceFunctionsRule.INSTANCE);
rules.add(HiveAggregateReduceRule.INSTANCE);
if (conf.getBoolVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZER)) {
rules.add(new HivePointLookupOptimizerRule.FilterCondition(minNumORClauses));
@@ -1839,7 +1841,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv
rules.toArray(new RelOptRule[rules.size()]));
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER,
"Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding");
-// it is happening at 1762
+
// 4. Push down limit through outer join
// NOTE: We run this after PPD to support old style join syntax.
// Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
index 01c933c4a1..8db24331d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java
@@ -41,16 +41,30 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
+ parameters[0].getTypeName() + " is passed.");
}
switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case LONG:
- return new SumZeroIfEmpty();
- default:
- throw new UDFArgumentTypeException(0,
- "Only bigint type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new SumLongZeroIfEmpty();
+ case TIMESTAMP:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ case VARCHAR:
+ case CHAR:
+ return new SumDoubleZeroIfEmpty();
+ case DECIMAL:
+ return new SumHiveDecimalZeroIfEmpty();
+ case BOOLEAN:
+ case DATE:
+ default:
+ throw new UDFArgumentTypeException(0,
+ "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
}
}
- public static class SumZeroIfEmpty extends GenericUDAFSumLong {
+ public static class SumLongZeroIfEmpty extends GenericUDAFSumLong {
@Override
public Object terminate(AggregationBuffer agg) throws HiveException {
@@ -59,5 +73,25 @@ public Object terminate(AggregationBuffer agg) throws HiveException {
return result;
}
}
+
+ public static class SumDoubleZeroIfEmpty extends GenericUDAFSumDouble {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
+
+ public static class SumHiveDecimalZeroIfEmpty extends GenericUDAFSumHiveDecimal {
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumHiveDecimalWritableAgg myagg = (SumHiveDecimalWritableAgg) agg;
+ result.set(myagg.sum);
+ return result;
+ }
+ }
}
diff --git a/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q
new file mode 100644
index 0000000000..ae4733f705
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udaf_binarysetfunctions_no_cbo.q
@@ -0,0 +1,60 @@
+set hive.cbo.enable=false;
+
+drop table t;
+create table t (id int,px int,y decimal,x decimal);
+
+insert into t values (101,1,1,1);
+insert into t values (201,2,1,1);
+insert into t values (301,3,1,1);
+insert into t values (401,4,1,11);
+insert into t values (501,5,1,null);
+insert into t values (601,6,null,1);
+insert into t values (701,6,null,null);
+insert into t values (102,1,2,2);
+insert into t values (202,2,1,2);
+insert into t values (302,3,2,1);
+insert into t values (402,4,2,12);
+insert into t values (502,5,2,null);
+insert into t values (602,6,null,2);
+insert into t values (702,6,null,null);
+insert into t values (103,1,3,3);
+insert into t values (203,2,1,3);
+insert into t values (303,3,3,1);
+insert into t values (403,4,3,13);
+insert into t values (503,5,3,null);
+insert into t values (603,6,null,3);
+insert into t values (703,6,null,null);
+insert into t values (104,1,4,4);
+insert into t values (204,2,1,4);
+insert into t values (304,3,4,1);
+insert into t values (404,4,4,14);
+insert into t values (504,5,4,null);
+insert into t values (604,6,null,4);
+insert into t values (704,6,null,null);
+insert into t values (800,7,1,1);
+
+
+explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px;
+
+select px,
+ round( var_pop(x),5),
+ round( var_pop(y),5),
+ round( corr(y,x),5),
+ round( covar_samp(y,x),5),
+ round( covar_pop(y,x),5),
+ regr_count(y,x),
+ round( regr_slope(y,x),5),
+ round( regr_intercept(y,x),5),
+ round( regr_r2(y,x),5),
+ round( regr_sxx(y,x),5),
+ round( regr_syy(y,x),5),
+ round( regr_sxy(y,x),5),
+ round( regr_avgx(y,x),5),
+ round( regr_avgy(y,x),5),
+ round( regr_count(y,x),5)
+ from t group by px order by px;
+
+
+select id,regr_count(y,x) over (partition by px) from t order by id;
diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
index af058a44e4..72d1bdb5b1 100644
--- a/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
+++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions.q.out
@@ -379,34 +379,38 @@ STAGE PLANS:
alias: t
Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
Select Operator
- expressions: px (type: int), y (type: decimal(10,0)), x (type: decimal(10,0))
- outputColumnNames: px, y, x
+ expressions: px (type: int), x (type: decimal(10,0)), y (type: decimal(10,0)), (UDFToDouble(x) * UDFToDouble(x)) (type: double), (UDFToDouble(y) * UDFToDouble(y)) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
Group By Operator
- aggregations: var_pop(x), var_pop(y), corr(y, x), covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x)
- keys: px (type: int)
+ aggregations: count(_col3), $sum0(_col3), count(_col1), $sum0(_col1), count(_col4), $sum0(_col4), count(_col2), $sum0(_col2), corr(_col2, _col1), covar_samp(_col2, _col1), covar_pop(_col2, _col1), regr_count(_col2, _col1), regr_slope(_col2, _col1), regr_intercept(_col2, _col1), regr_r2(_col2, _col1), regr_sxx(_col2, _col1), regr_syy(_col2, _col1), regr_sxy(_col2, _col1), regr_avgx(_col2, _col1), regr_avgy(_col2, _col1)
+ keys: _col0 (type: int)
mode: hash
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
sort order: +
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
- value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct)
+ value expressions: _col1 (type: bigint), _col2 (type: double), _col3 (type: bigint), _col4 (type: decimal(20,0)), _col5 (type: bigint), _col6 (type: double), _col7 (type: bigint), _col8 (type: decimal(20,0)), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: bigint), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct), _col20 (type: struct)
Reduce Operator Tree:
Group By Operator
- aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13)
+ aggregations: count(VALUE._col0), $sum0(VALUE._col1), count(VALUE._col2), $sum0(VALUE._col3), count(VALUE._col4), $sum0(VALUE._col5), count(VALUE._col6), $sum0(VALUE._col7), corr(VALUE._col8), covar_samp(VALUE._col9), covar_pop(VALUE._col10), regr_count(VALUE._col11), regr_slope(VALUE._col12), regr_intercept(VALUE._col13), regr_r2(VALUE._col14), regr_sxx(VALUE._col15), regr_syy(VALUE._col16), regr_sxy(VALUE._col17), regr_avgx(VALUE._col18), regr_avgy(VALUE._col19)
keys: KEY._col0 (type: int)
mode: mergepartial
- outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Select Operator
+ expressions: _col0 (type: int), ((CASE WHEN ((_col1 = 0L)) THEN (null) ELSE (_col2) END - ((CASE WHEN ((_col3 = 0L)) THEN (null) ELSE (_col4) END * CASE WHEN ((_col3 = 0L)) THEN (null) ELSE (_col4) END) / _col3)) / _col3) (type: double), ((CASE WHEN ((_col5 = 0L)) THEN (null) ELSE (_col6) END - ((CASE WHEN ((_col7 = 0L)) THEN (null) ELSE (_col8) END * CASE WHEN ((_col7 = 0L)) THEN (null) ELSE (_col8) END) / _col7)) / _col7) (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: decimal(14,4)), _col20 (type: decimal(14,4))
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-2
Map Reduce
diff --git a/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out b/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out
new file mode 100644
index 0000000000..6185693ade
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udaf_binarysetfunctions_no_cbo.q.out
@@ -0,0 +1,522 @@
+PREHOOK: query: drop table t
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table t
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table t (id int,px int,y decimal,x decimal)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (id int,px int,y decimal,x decimal)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: insert into t values (101,1,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (101,1,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (201,2,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (201,2,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (301,3,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (301,3,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (401,4,1,11)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (401,4,1,11)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (501,5,1,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (501,5,1,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (601,6,null,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (601,6,null,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (701,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (701,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (102,1,2,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (102,1,2,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (202,2,1,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (202,2,1,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (302,3,2,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (302,3,2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (402,4,2,12)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (402,4,2,12)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (502,5,2,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (502,5,2,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (602,6,null,2)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (602,6,null,2)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (702,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (702,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (103,1,3,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (103,1,3,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (203,2,1,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (203,2,1,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (303,3,3,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (303,3,3,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (403,4,3,13)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (403,4,3,13)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (503,5,3,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (503,5,3,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (603,6,null,3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (603,6,null,3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (703,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (703,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (104,1,4,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (104,1,4,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (204,2,1,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (204,2,1,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (304,3,4,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (304,3,4,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (404,4,4,14)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (404,4,4,14)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (504,5,4,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (504,5,4,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (604,6,null,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (604,6,null,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (704,6,null,null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (704,6,null,null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: insert into t values (800,7,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (800,7,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.id SCRIPT []
+POSTHOOK: Lineage: t.px SCRIPT []
+POSTHOOK: Lineage: t.x SCRIPT []
+POSTHOOK: Lineage: t.y SCRIPT []
+PREHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select px,var_pop(x),var_pop(y),corr(y,x),covar_samp(y,x),covar_pop(y,x),regr_count(y,x),regr_slope(y,x),
+regr_intercept(y,x), regr_r2(y,x), regr_sxx(y,x), regr_syy(y,x), regr_sxy(y,x), regr_avgx(y,x), regr_avgy(y,x), regr_count(y,x)
+ from t group by px order by px
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: px (type: int), y (type: decimal(10,0)), x (type: decimal(10,0))
+ outputColumnNames: px, y, x
+ Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: var_pop(x), var_pop(y), corr(y, x), covar_samp(y, x), covar_pop(y, x), regr_count(y, x), regr_slope(y, x), regr_intercept(y, x), regr_r2(y, x), regr_sxx(y, x), regr_syy(y, x), regr_sxy(y, x), regr_avgx(y, x), regr_avgy(y, x)
+ keys: px (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 29 Data size: 281 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: bigint), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: var_pop(VALUE._col0), var_pop(VALUE._col1), corr(VALUE._col2), covar_samp(VALUE._col3), covar_pop(VALUE._col4), regr_count(VALUE._col5), regr_slope(VALUE._col6), regr_intercept(VALUE._col7), regr_r2(VALUE._col8), regr_sxx(VALUE._col9), regr_syy(VALUE._col10), regr_sxy(VALUE._col11), regr_avgx(VALUE._col12), regr_avgy(VALUE._col13)
+ keys: KEY._col0 (type: int)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
+ Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ Stage: Stage-2
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: bigint), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(14,4)), _col14 (type: decimal(14,4))
+ Reduce Operator Tree:
+ Select Operator
+ expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: double), VALUE._col11 (type: double), VALUE._col12 (type: decimal(14,4)), VALUE._col13 (type: decimal(14,4)), VALUE._col5 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
+ Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 14 Data size: 135 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select px,
+ round( var_pop(x),5),
+ round( var_pop(y),5),
+ round( corr(y,x),5),
+ round( covar_samp(y,x),5),
+ round( covar_pop(y,x),5),
+ regr_count(y,x),
+ round( regr_slope(y,x),5),
+ round( regr_intercept(y,x),5),
+ round( regr_r2(y,x),5),
+ round( regr_sxx(y,x),5),
+ round( regr_syy(y,x),5),
+ round( regr_sxy(y,x),5),
+ round( regr_avgx(y,x),5),
+ round( regr_avgy(y,x),5),
+ round( regr_count(y,x),5)
+ from t group by px order by px
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select px,
+ round( var_pop(x),5),
+ round( var_pop(y),5),
+ round( corr(y,x),5),
+ round( covar_samp(y,x),5),
+ round( covar_pop(y,x),5),
+ regr_count(y,x),
+ round( regr_slope(y,x),5),
+ round( regr_intercept(y,x),5),
+ round( regr_r2(y,x),5),
+ round( regr_sxx(y,x),5),
+ round( regr_syy(y,x),5),
+ round( regr_sxy(y,x),5),
+ round( regr_avgx(y,x),5),
+ round( regr_avgy(y,x),5),
+ round( regr_count(y,x),5)
+ from t group by px order by px
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+1 1.25 1.25 1.0 1.66667 1.25 4 1.0 0.0 1.0 5.0 5.0 5.0 2.50000 2.50000 4
+2 1.25 0.0 NULL 0.0 0.0 4 0.0 1.0 1.0 5.0 0.0 0.0 2.50000 1.00000 4
+3 0.0 1.25 NULL 0.0 0.0 4 NULL NULL NULL 0.0 5.0 0.0 1.00000 2.50000 4
+4 1.25 1.25 1.0 1.66667 1.25 4 1.0 -10.0 1.0 5.0 5.0 5.0 12.50000 2.50000 4
+5 NULL 1.25 NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0
+6 1.25 NULL NULL NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL 0
+7 0.0 0.0 NULL NULL 0.0 1 NULL NULL NULL 0.0 0.0 0.0 1.00000 1.00000 1
+PREHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+#### A masked pattern was here ####
+POSTHOOK: query: select id,regr_count(y,x) over (partition by px) from t order by id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+#### A masked pattern was here ####
+101 4
+102 4
+103 4
+104 4
+201 4
+202 4
+203 4
+204 4
+301 4
+302 4
+303 4
+304 4
+401 4
+402 4
+403 4
+404 4
+501 0
+502 0
+503 0
+504 0
+601 0
+602 0
+603 0
+604 0
+701 0
+702 0
+703 0
+704 0
+800 1