diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java index 9cb62c8..d603833 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveAggregate.java @@ -37,10 +37,14 @@ import org.apache.calcite.util.IntList; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Sets; public class HiveAggregate extends Aggregate implements HiveRelNode { + private ImmutableList aggregateColumnsOrder; + + public HiveAggregate(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, boolean indicator, ImmutableBitSet groupSet, List groupSets, List aggCalls) { @@ -126,4 +130,12 @@ public static RelDataType deriveRowType(RelDataTypeFactory typeFactory, return builder.build(); } + public void setAggregateColumnsOrder(ImmutableList aggregateColumnsOrder) { + this.aggregateColumnsOrder = aggregateColumnsOrder; + } + + public List getAggregateColumnsOrder() { + return this.aggregateColumnsOrder; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelCollationPropagator.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelCollationPropagator.java new file mode 100644 index 0000000..747dcfd --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelCollationPropagator.java @@ -0,0 +1,248 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Filter; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SetOp; +import org.apache.calcite.rel.core.Sort; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ReflectUtil; +import org.apache.calcite.util.ReflectiveVisitor; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; + +import com.google.common.collect.ImmutableList; + + +/** + * This class infers the order in Aggregate columns and the order of conjuncts + * in a Join condition that might be more beneficial to avoid additional sort + * stages. The only visible change is that order of join conditions might change. + * Further, Aggregate operators might get annotated with order in which Aggregate + * columns should be generated when we transform the operator tree into AST or + * Hive operator tree. + */ +public class HiveRelCollationPropagator implements ReflectiveVisitor { + + private final ReflectUtil.MethodDispatcher propagateDispatcher; + private final RelBuilder relBuilder; + + + /** + * Creates a HiveRelCollationPropagator. + */ + public HiveRelCollationPropagator(RelBuilder relBuilder) { + this.relBuilder = relBuilder; + this.propagateDispatcher = + ReflectUtil.createMethodDispatcher( + RelNode.class, + this, + "propagate", + RelNode.class, + List.class); + } + + + /** + * Execute the logic in this class. In particular, make a top-down traversal of the tree + * and annotate and recreate appropiate operators. + */ + public RelNode propagate(RelNode root) { + final RelNode newRoot = dispatchPropagate(root, ImmutableList.of()); + return newRoot; + } + + protected final RelNode dispatchPropagate(RelNode node, List collations) { + return propagateDispatcher.invoke(node, collations); + } + + public RelNode propagate(Aggregate rel, List collations) { + // 1) We extract the group by positions that are part of the collations and + // sort them so they respect it + ImmutableList.Builder aggregateColumnsOrderBuilder = ImmutableList.builder(); + ImmutableList.Builder propagateCollations = ImmutableList.builder(); + if (!rel.indicator && !collations.isEmpty()) { + for (RelFieldCollation c : collations) { + if (c.getFieldIndex() < rel.getGroupCount()) { + // Group column found + aggregateColumnsOrderBuilder.add(c.getFieldIndex()); + propagateCollations.add(c.copy(rel.getGroupSet().nth(c.getFieldIndex()))); + } + } + } + ImmutableList aggregateColumnsOrder = aggregateColumnsOrderBuilder.build(); + for (int i = 0; i < rel.getGroupCount(); i++) { + if (!aggregateColumnsOrder.contains(i)) { + // Not included in the input collations, but can be propagated as this Aggregate + // will enforce it + propagateCollations.add(new RelFieldCollation(rel.getGroupSet().nth(i))); + } + } + + // 2) We propagate + final RelNode child = dispatchPropagate(rel.getInput(), propagateCollations.build()); + + // 3) We annotate the Aggregate operator with this info + final HiveAggregate newAggregate = (HiveAggregate) rel.copy(rel.getTraitSet(), + ImmutableList.of(child)); + newAggregate.setAggregateColumnsOrder(aggregateColumnsOrder); + return newAggregate; + } + + public RelNode propagate(Join rel, List collations) { + ImmutableList.Builder propagateCollationsLeft = ImmutableList.builder(); + ImmutableList.Builder propagateCollationsRight = ImmutableList.builder(); + final int nLeftColumns = rel.getLeft().getRowType().getFieldList().size(); + Map idxToConjuncts = new HashMap<>(); + Map refToRef = new HashMap<>(); + // 1) We extract the conditions that can be useful + List conjuncts = new ArrayList<>(); + List otherConjuncts = new ArrayList<>(); + for (RexNode conj : RelOptUtil.conjunctions(rel.getCondition())) { + if (conj.getKind() != SqlKind.EQUALS) { + otherConjuncts.add(conj); + continue; + } + // It needs to be an EQUAL operator on two references + RexCall equals = (RexCall) conj; + if (!(equals.getOperands().get(0) instanceof RexInputRef) || + !(equals.getOperands().get(1) instanceof RexInputRef)) { + otherConjuncts.add(conj); + continue; + } + RexInputRef ref0 = (RexInputRef) equals.getOperands().get(0); + RexInputRef ref1 = (RexInputRef) equals.getOperands().get(1); + if ((ref0.getIndex() < nLeftColumns && ref1.getIndex() >= nLeftColumns) || + (ref1.getIndex() < nLeftColumns && ref0.getIndex() >= nLeftColumns)) { + // We made sure the references are for different join inputs + idxToConjuncts.put(ref0.getIndex(), equals); + idxToConjuncts.put(ref1.getIndex(), equals); + refToRef.put(ref0.getIndex(), ref1.getIndex()); + refToRef.put(ref1.getIndex(), ref0.getIndex()); + } else { + otherConjuncts.add(conj); + } + } + + // 2) We extract the collation for this operator and the collations + // that we will propagate to the inputs of the join + for (RelFieldCollation c : collations) { + RexNode equals = idxToConjuncts.get(c.getFieldIndex()); + if (equals != null) { + conjuncts.add(equals); + idxToConjuncts.remove(c.getFieldIndex()); + idxToConjuncts.remove(refToRef.get(c.getFieldIndex())); + if (c.getFieldIndex() < nLeftColumns) { + propagateCollationsLeft.add(c.copy(c.getFieldIndex())); + propagateCollationsRight.add(c.copy(refToRef.get(c.getFieldIndex()) - nLeftColumns)); + } else { + propagateCollationsLeft.add(c.copy(refToRef.get(c.getFieldIndex()))); + propagateCollationsRight.add(c.copy(c.getFieldIndex() - nLeftColumns)); + } + } + } + final Set visited = new HashSet<>(); + for (Entry e : idxToConjuncts.entrySet()) { + if (visited.add(e.getValue())) { + // Not included in the input collations, but can be propagated as this Join + // might enforce it + conjuncts.add(e.getValue()); + if (e.getKey() < nLeftColumns) { + propagateCollationsLeft.add(new RelFieldCollation(e.getKey())); + propagateCollationsRight.add(new RelFieldCollation(refToRef.get(e.getKey()) - nLeftColumns)); + } else { + propagateCollationsLeft.add(new RelFieldCollation(refToRef.get(e.getKey()))); + propagateCollationsRight.add(new RelFieldCollation(e.getKey() - nLeftColumns)); + } + } + } + conjuncts.addAll(otherConjuncts); + + // 3) We propagate + final RelNode newLeftInput = dispatchPropagate(rel.getLeft(), propagateCollationsLeft.build()); + final RelNode newRightInput = dispatchPropagate(rel.getRight(), propagateCollationsRight.build()); + + // 4) We change the Join operator to reflect this info + final RelNode newJoin = rel.copy(rel.getTraitSet(), RexUtil.composeConjunction( + relBuilder.getRexBuilder(), conjuncts, false), newLeftInput, newRightInput, + rel.getJoinType(), rel.isSemiJoinDone()); + return newJoin; + } + + public RelNode propagate(SetOp rel, List collations) { + ImmutableList.Builder newInputs = new ImmutableList.Builder<>(); + for (RelNode input : rel.getInputs()) { + newInputs.add(dispatchPropagate(input, collations)); + } + return rel.copy(rel.getTraitSet(), newInputs.build()); + } + + public RelNode propagate(Project rel, List collations) { + // 1) We extract the collations indices + ImmutableList.Builder propagateCollations = ImmutableList.builder(); + for (RelFieldCollation c : collations) { + RexNode rexNode = rel.getChildExps().get(c.getFieldIndex()); + if (rexNode instanceof RexInputRef) { + int newIdx = ((RexInputRef) rexNode).getIndex(); + propagateCollations.add(c.copy((newIdx))); + } + } + // 2) We propagate + final RelNode child = dispatchPropagate(rel.getInput(), propagateCollations.build()); + // 3) Return new Project + return rel.copy(rel.getTraitSet(), ImmutableList.of(child)); + } + + public RelNode propagate(Filter rel, List collations) { + final RelNode child = dispatchPropagate(rel.getInput(), collations); + return rel.copy(rel.getTraitSet(), ImmutableList.of(child)); + } + + public RelNode propagate(Sort rel, List collations) { + final RelNode child = dispatchPropagate(rel.getInput(), rel.collation.getFieldCollations()); + return rel.copy(rel.getTraitSet(), ImmutableList.of(child)); + } + + // Catch-all rule when none of the others apply. + public RelNode propagate(RelNode rel, List collations) { + ImmutableList.Builder newInputs = new ImmutableList.Builder<>(); + for (RelNode input : rel.getInputs()) { + newInputs.add(dispatchPropagate(input, ImmutableList.of())); + } + return rel.copy(rel.getTraitSet(), newInputs.build()); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java index 353d8db..3c39aea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveGroupingID; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; @@ -142,11 +143,19 @@ else if (aggregateType == Group.CUBE) { b = ASTBuilder.construct(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); } - for (int i : groupBy.getGroupSet()) { - RexInputRef iRef = new RexInputRef(i, groupBy.getCluster().getTypeFactory() - .createSqlType(SqlTypeName.ANY)); + HiveAggregate hiveAgg = (HiveAggregate) groupBy; + for (int pos : hiveAgg.getAggregateColumnsOrder()) { + RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos), + groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY)); b.add(iRef.accept(new RexVisitor(schema))); } + for (int pos = 0; pos < groupBy.getGroupCount(); pos++) { + if (!hiveAgg.getAggregateColumnsOrder().contains(pos)) { + RexInputRef iRef = new RexInputRef(groupBy.getGroupSet().nth(pos), + groupBy.getCluster().getTypeFactory().createSqlType(SqlTypeName.ANY)); + b.add(iRef.accept(new RexVisitor(schema))); + } + } //Grouping sets expressions if(groupingSetsExpression) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index 1a543fb..6fc8eae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -40,16 +40,18 @@ import org.apache.calcite.rex.RexOver; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.util.Pair; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelCollationPropagator; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; @@ -65,6 +67,13 @@ public static RelNode convertOpTree(RelNode rel, List resultSchema) LOG.debug("Original plan for PlanModifier\n " + RelOptUtil.toString(newTopNode)); } + HiveRelCollationPropagator propagator = new HiveRelCollationPropagator( + HiveRelFactories.HIVE_BUILDER.create(newTopNode.getCluster(), null)); + newTopNode = propagator.propagate(newTopNode); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after propagating order\n " + RelOptUtil.toString(newTopNode)); + } + if (!(newTopNode instanceof Project) && !(newTopNode instanceof Sort)) { newTopNode = introduceDerivedTable(newTopNode); if (LOG.isDebugEnabled()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java index 77771c3..d53efbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/correlation/ReduceSinkDeDuplication.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.PTFOperator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -205,7 +206,7 @@ protected boolean merge(ReduceSinkOperator cRS, JoinOperator pJoin, int minReduc return false; } - Integer moveRSOrderTo = checkOrder(cRSc.getOrder(), pRSNc.getOrder(), + Integer moveRSOrderTo = checkOrder(true, cRSc.getOrder(), pRSNc.getOrder(), cRSc.getNullOrder(), pRSNc.getNullOrder()); if (moveRSOrderTo == null) { return false; @@ -304,6 +305,16 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR } pRS.getConf().setOrder(cRS.getConf().getOrder()); pRS.getConf().setNullOrder(cRS.getConf().getNullOrder()); + } else { + // The sorting order of the parent RS is more specific or they are equal. + // We will copy the order from the child RS, and then fill in the order + // of the rest of columns with the one taken from parent RS. + StringBuilder order = new StringBuilder(cRS.getConf().getOrder()); + StringBuilder orderNull = new StringBuilder(cRS.getConf().getNullOrder()); + order.append(pRS.getConf().getOrder().substring(order.length())); + orderNull.append(pRS.getConf().getNullOrder().substring(orderNull.length())); + pRS.getConf().setOrder(order.toString()); + pRS.getConf().setNullOrder(orderNull.toString()); } if (result[3] > 0) { @@ -342,7 +353,9 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR throws SemanticException { ReduceSinkDesc cConf = cRS.getConf(); ReduceSinkDesc pConf = pRS.getConf(); - Integer moveRSOrderTo = checkOrder(cConf.getOrder(), pConf.getOrder(), + // If there is a PTF between cRS and pRS we cannot ignore the order direction + final boolean checkStrictEquality = isStrictEqualityNeeded(cRS, pRS); + Integer moveRSOrderTo = checkOrder(checkStrictEquality, cConf.getOrder(), pConf.getOrder(), cConf.getNullOrder(), pConf.getNullOrder()); if (moveRSOrderTo == null) { return null; @@ -370,6 +383,18 @@ protected boolean merge(ReduceSinkOperator cRS, ReduceSinkOperator pRS, int minR moveReducerNumTo, moveNumDistKeyTo}; } + private boolean isStrictEqualityNeeded(ReduceSinkOperator cRS, ReduceSinkOperator pRS) { + Operator parent = cRS.getParentOperators().get(0); + while (parent != pRS) { + assert parent.getNumParent() == 1; + if (parent instanceof PTFOperator) { + return true; + } + parent = parent.getParentOperators().get(0); + } + return false; + } + private Integer checkNumDistributionKey(int cnd, int pnd) { // number of distribution keys of cRS is chosen only when numDistKeys of pRS // is 0 or less. In all other cases, distribution of the keys is based on @@ -452,8 +477,7 @@ protected Integer sameKeys(List cexprs, List pexprs, return Integer.valueOf(cexprs.size()).compareTo(pexprs.size()); } - // order of overlapping keys should be exactly the same - protected Integer checkOrder(String corder, String porder, + protected Integer checkOrder(boolean checkStrictEquality, String corder, String porder, String cNullOrder, String pNullOrder) { assert corder.length() == cNullOrder.length(); assert porder.length() == pNullOrder.length(); @@ -468,12 +492,15 @@ protected Integer checkOrder(String corder, String porder, } corder = corder.trim(); porder = porder.trim(); - cNullOrder = cNullOrder.trim(); - pNullOrder = pNullOrder.trim(); - int target = Math.min(corder.length(), porder.length()); - if (!corder.substring(0, target).equals(porder.substring(0, target)) || - !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { - return null; + if (checkStrictEquality) { + // order of overlapping keys should be exactly the same + cNullOrder = cNullOrder.trim(); + pNullOrder = pNullOrder.trim(); + int target = Math.min(corder.length(), porder.length()); + if (!corder.substring(0, target).equals(porder.substring(0, target)) || + !cNullOrder.substring(0, target).equals(pNullOrder.substring(0, target))) { + return null; + } } return Integer.valueOf(corder.length()).compareTo(porder.length()); } diff --git ql/src/test/queries/clientpositive/limit_pushdown2.q ql/src/test/queries/clientpositive/limit_pushdown2.q new file mode 100644 index 0000000..637b5b0 --- /dev/null +++ ql/src/test/queries/clientpositive/limit_pushdown2.q @@ -0,0 +1,78 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20; + +explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20; + +explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20; + +-- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20; + +-- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; + +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20; diff --git ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q new file mode 100644 index 0000000..cd67f4c --- /dev/null +++ ql/src/test/queries/clientpositive/reduce_deduplicate_extended2.q @@ -0,0 +1,92 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.auto.convert.join.noconditionaltask=false; +set hive.convert.join.bucket.mapjoin.tez=false; +set hive.optimize.dynamic.partition.hashjoin=false; +set hive.limit.pushdown.memory.usage=0.3f; +set hive.optimize.reducededuplication.min.reducer=1; + +-- JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key; + +-- JOIN + GBY + OBY +EXPLAIN +SELECT g.key, f.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.key, f.value +ORDER BY f.value, g.key; + +-- GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key; + +-- 2GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key; + +-- 2GBY + JOIN + GBY + OBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT value + FROM src + GROUP BY value) g +JOIN ( + SELECT key + FROM src + GROUP BY key) f +GROUP BY g.value, f.key +ORDER BY f.key desc, g.value; + +-- 2(2GBY + JOIN + GBY + OBY) + UNION +EXPLAIN +SELECT x.key, x.value +FROM ( + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY key, value) f + JOIN ( + SELECT key, value + FROM src + GROUP BY value, key) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY g.value, f.key +UNION ALL + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f + JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY f.key, g.value +) x +ORDER BY x.value desc, x.key desc; diff --git ql/src/test/results/clientpositive/bucket_groupby.q.out ql/src/test/results/clientpositive/bucket_groupby.q.out index 867fad4..4d66881 100644 --- ql/src/test/results/clientpositive/bucket_groupby.q.out +++ ql/src/test/results/clientpositive/bucket_groupby.q.out @@ -1547,12 +1547,12 @@ STAGE PLANS: alias: clustergroupby Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string), key (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: string), value (type: string) + outputColumnNames: _col1, _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string), _col1 (type: string) + keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1570,7 +1570,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string), _col2 (type: bigint) + expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/limit_pushdown2.q.out ql/src/test/results/clientpositive/limit_pushdown2.q.out new file mode 100644 index 0000000..2f68674 --- /dev/null +++ ql/src/test/results/clientpositive/limit_pushdown2.q.out @@ -0,0 +1,804 @@ +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: +- + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by key, value +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 1.0 +10 val_10 11.0 +100 val_100 101.0 +103 val_103 104.0 +104 val_104 105.0 +105 val_105 106.0 +11 val_11 12.0 +111 val_111 112.0 +113 val_113 114.0 +114 val_114 115.0 +116 val_116 117.0 +118 val_118 119.0 +119 val_119 120.0 +12 val_12 13.0 +120 val_120 121.0 +125 val_125 126.0 +126 val_126 127.0 +128 val_128 129.0 +129 val_129 130.0 +131 val_131 132.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key +order by key desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 99.0 +97 val_97 98.0 +96 val_96 97.0 +95 val_95 96.0 +92 val_92 93.0 +90 val_90 91.0 +9 val_9 10.0 +87 val_87 88.0 +86 val_86 87.0 +85 val_85 86.0 +84 val_84 85.0 +83 val_83 84.0 +82 val_82 83.0 +80 val_80 81.0 +8 val_8 9.0 +78 val_78 79.0 +77 val_77 78.0 +76 val_76 77.0 +74 val_74 75.0 +72 val_72 73.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: double), _col0 (type: string) + sort order: +- + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, avg(key + 1) myavg from src +group by value +order by myavg, value desc limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 1.0 +val_2 3.0 +val_4 5.0 +val_5 6.0 +val_8 9.0 +val_9 10.0 +val_10 11.0 +val_11 12.0 +val_12 13.0 +val_15 16.0 +val_17 18.0 +val_18 19.0 +val_19 20.0 +val_20 21.0 +val_24 25.0 +val_26 27.0 +val_27 28.0 +val_28 29.0 +val_30 31.0 +val_33 34.0 +PREHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +POSTHOOK: query: -- NOT APPLICABLE +explain +select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string), (UDFToDouble(key) + 1.0) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(_col2) + keys: _col0 (type: string), _col1 (type: string), '0' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + pruneGroupingSetId: true + Select Operator + expressions: _col1 (type: string), _col0 (type: string), _col3 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 0.3 + value expressions: _col2 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 20 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 20 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value, avg(key + 1) from src +group by value, key with rollup +order by key, value limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL NULL 261.182 +NULL val_0 1.0 +NULL val_10 11.0 +NULL val_100 101.0 +NULL val_103 104.0 +NULL val_104 105.0 +NULL val_105 106.0 +NULL val_11 12.0 +NULL val_111 112.0 +NULL val_113 114.0 +NULL val_114 115.0 +NULL val_116 117.0 +NULL val_118 119.0 +NULL val_119 120.0 +NULL val_12 13.0 +NULL val_120 121.0 +NULL val_125 126.0 +NULL val_126 127.0 +NULL val_128 129.0 +NULL val_129 130.0 diff --git ql/src/test/results/clientpositive/lineage3.q.out ql/src/test/results/clientpositive/lineage3.q.out index 12ae13e..a769022 100644 --- ql/src/test/results/clientpositive/lineage3.q.out +++ ql/src/test/results/clientpositive/lineage3.q.out @@ -317,7 +317,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} +{"version":"1.0","engine":"mr","database":"default","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (tok_nulls_first (. (tok_table_or_col $hdt$_0) csmallint))))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[3],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8,7],"targets":[0,1,2],"expression":"((a.cboolean2 = true) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9,7],"targets":[0,1,2],"expression":"((a.cfloat > 0.0) and a.cint is not null)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} 38 216 false 38 229 true PREHOOK: query: drop table if exists src_dp diff --git ql/src/test/results/clientpositive/perf/query45.q.out ql/src/test/results/clientpositive/perf/query45.q.out index 04f9b02..c11cd2d 100644 --- ql/src/test/results/clientpositive/perf/query45.q.out +++ ql/src/test/results/clientpositive/perf/query45.q.out @@ -25,15 +25,15 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_42] - Select Operator [SEL_41] (rows=95833781 width=135) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_40] (rows=95833781 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_39] - PartitionCols:_col0, _col1 - Group By Operator [GBY_38] (rows=191667562 width=135) - Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col3, _col4 + Group By Operator [GBY_40] (rows=95833781 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_39] + PartitionCols:_col0, _col1 + Group By Operator [GBY_38] (rows=191667562 width=135) + Output:["_col0","_col1","_col2"],aggregations:["sum(_col10)"],keys:_col4, _col3 + Select Operator [SEL_37] (rows=191667562 width=135) + Output:["_col4","_col3","_col10"] Merge Join Operator [MERGEJOIN_72] (rows=191667562 width=135) Conds:RS_34._col0=RS_35._col4(Inner),Output:["_col3","_col4","_col10"] <-Reducer 2 [SIMPLE_EDGE] diff --git ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out new file mode 100644 index 0000000..da487ef --- /dev/null +++ ql/src/test/results/clientpositive/reduce_deduplicate_extended2.q.out @@ -0,0 +1,1079 @@ +PREHOOK: query: -- JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +PREHOOK: type: QUERY +POSTHOOK: query: -- JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- JOIN + GBY + OBY +EXPLAIN +SELECT g.key, f.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.key, f.value +ORDER BY f.value, g.key +PREHOOK: type: QUERY +POSTHOOK: query: -- JOIN + GBY + OBY +EXPLAIN +SELECT g.key, f.value +FROM src f +JOIN src g ON (f.key = g.key AND f.value = g.value) +GROUP BY g.key, f.value +ORDER BY f.value, g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +PREHOOK: type: QUERY +POSTHOOK: query: -- GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM src f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +PREHOOK: type: QUERY +POSTHOOK: query: -- 2GBY + JOIN + GBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f +JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g +ON (f.key = g.key AND f.value = g.value) +GROUP BY g.value, f.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: -- 2GBY + JOIN + GBY + OBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT value + FROM src + GROUP BY value) g +JOIN ( + SELECT key + FROM src + GROUP BY key) f +GROUP BY g.value, f.key +ORDER BY f.key desc, g.value +PREHOOK: type: QUERY +POSTHOOK: query: -- 2GBY + JOIN + GBY + OBY +EXPLAIN +SELECT f.key, g.value +FROM ( + SELECT value + FROM src + GROUP BY value) g +JOIN ( + SELECT key + FROM src + GROUP BY key) f +GROUP BY g.value, f.key +ORDER BY f.key desc, g.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: -+ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 62500 Data size: 1390500 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 31250 Data size: 695250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- 2(2GBY + JOIN + GBY + OBY) + UNION +EXPLAIN +SELECT x.key, x.value +FROM ( + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY key, value) f + JOIN ( + SELECT key, value + FROM src + GROUP BY value, key) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY g.value, f.key +UNION ALL + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f + JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY f.key, g.value +) x +ORDER BY x.value desc, x.key desc +PREHOOK: type: QUERY +POSTHOOK: query: -- 2(2GBY + JOIN + GBY + OBY) + UNION +EXPLAIN +SELECT x.key, x.value +FROM ( + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY key, value) f + JOIN ( + SELECT key, value + FROM src + GROUP BY value, key) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY g.value, f.key +UNION ALL + SELECT f.key, g.value + FROM ( + SELECT key, value + FROM src + GROUP BY value, key) f + JOIN ( + SELECT key, value + FROM src + GROUP BY key, value) g + ON (f.key = g.key AND f.value = g.value) + GROUP BY f.key, g.value +) x +ORDER BY x.value desc, x.key desc +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3, Stage-8 + Stage-5 is a root stage + Stage-6 is a root stage + Stage-7 depends on stages: Stage-6, Stage-9 + Stage-8 depends on stages: Stage-7 + Stage-9 is a root stage + Stage-0 depends on stages: Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col1, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Union + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: -- + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + TableScan + Union + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: -- + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 274 Data size: 2910 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col1, _col0 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col1 (type: string), _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index cb3d9a4..bd6e5ab 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -94,14 +94,14 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized @@ -109,12 +109,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 1871c7e..be4b1a0 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -651,70 +651,68 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_35] - Select Operator [SEL_34] (rows=1 width=20) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_33] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0, _col1 - Group By Operator [GBY_31] (rows=1 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col2, _col6 - Select Operator [SEL_30] (rows=1 width=20) - Output:["_col2","_col6"] - Filter Operator [FIL_29] (rows=1 width=20) - predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) - Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) - Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_27] - PartitionCols:_col0 - Select Operator [SEL_24] (rows=20 width=80) - Output:["_col0","_col1"] - Filter Operator [FIL_41] (rows=20 width=80) - predicate:(c_int > 0) - TableScan [TS_22] (rows=20 width=80) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_9] (rows=1 width=97) - Output:["_col0","_col1","_col2"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_8] - Select Operator [SEL_6] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_39] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_26] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=1 width=89) - Output:["_col0","_col1"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_19] - Select Operator [SEL_17] (rows=1 width=105) - Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_16] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_40] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) - TableScan [TS_11] (rows=20 width=83) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_33] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0, _col1 + Group By Operator [GBY_31] (rows=1 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col6, _col2 + Select Operator [SEL_30] (rows=1 width=20) + Output:["_col6","_col2"] + Filter Operator [FIL_29] (rows=1 width=20) + predicate:(((_col1 + _col4) >= 0) and ((_col1 > 0) or (_col6 >= 0)) and ((_col6 >= 1) or (_col2 >= 1)) and ((UDFToLong(_col6) + _col2) >= 0)) + Merge Join Operator [MERGEJOIN_42] (rows=4 width=20) + Conds:RS_25._col0=RS_26._col0(Outer),RS_25._col0=RS_27._col0(Right Outer),Output:["_col1","_col2","_col4","_col6"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_27] + PartitionCols:_col0 + Select Operator [SEL_24] (rows=20 width=80) + Output:["_col0","_col1"] + Filter Operator [FIL_41] (rows=20 width=80) + predicate:(c_int > 0) + TableScan [TS_22] (rows=20 width=80) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_9] (rows=1 width=97) + Output:["_col0","_col1","_col2"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_8] + Select Operator [SEL_6] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_39] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_26] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=1 width=89) + Output:["_col0","_col1"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_19] + Select Operator [SEL_17] (rows=1 width=105) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_16] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_14] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_40] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0)) + TableScan [TS_11] (rows=20 width=83) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t3.c_int, c, count(*) from (select key as a, c_int+1 as b, sum(c_int) as c from cbo_t1 where (cbo_t1.c_int + 1 >= 0) and (cbo_t1.c_int > 0 or cbo_t1.c_float >= 0) group by c_float, cbo_t1.c_int, key having cbo_t1.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t1 join (select key as p, c_int+1 as q, sum(c_int) as r from cbo_t2 where (cbo_t2.c_int + 1 >= 0) and (cbo_t2.c_int > 0 or cbo_t2.c_float >= 0) group by c_float, cbo_t2.c_int, key having cbo_t2.c_float > 0 and (c_int >=1 or c_float >= 1) and (c_int + c_float) >= 0) cbo_t2 on cbo_t1.a=p join cbo_t3 on cbo_t1.a=key where (b + cbo_t2.q >= 0) and (b > 0 or c_int >= 0) group by cbo_t3.c_int, c PREHOOK: type: QUERY @@ -1348,28 +1346,26 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_12] - Select Operator [SEL_11] (rows=5 width=20) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_10] (rows=5 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_9] - PartitionCols:_col0, _col1 - Group By Operator [GBY_8] (rows=5 width=20) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=10 width=91) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=10 width=91) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_3] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_2] (rows=10 width=91) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Select Operator [SEL_1] (rows=20 width=83) - Output:["key","c_int","c_float"] - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Group By Operator [GBY_10] (rows=5 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + PartitionCols:_col0, _col1 + Group By Operator [GBY_8] (rows=5 width=20) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 + Select Operator [SEL_5] (rows=10 width=91) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=10 width=91) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_2] (rows=10 width=91) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Select Operator [SEL_1] (rows=20 width=83) + Output:["key","c_int","c_float"] + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select key from(select key from (select key from cbo_t1 limit 5)cbo_t2 limit 5)cbo_t3 limit 5 PREHOOK: type: QUERY @@ -1676,72 +1672,74 @@ Stage-0 Output:["_col0","_col1","_col2"] <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_39] - Group By Operator [GBY_37] (rows=1 width=101) - Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=1 width=101) - Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col0, _col1 - Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) - Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] - <-Map 10 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Group By Operator [GBY_29] (rows=3 width=56) - Output:["_col0"],keys:_col0 - Select Operator [SEL_25] (rows=6 width=70) - Output:["_col0"] - Filter Operator [FIL_50] (rows=6 width=70) - predicate:(UDFToDouble(key) > 0.0) - TableScan [TS_23] (rows=20 width=76) - default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_30] - PartitionCols:_col0 - Select Operator [SEL_10] (rows=1 width=93) - Output:["_col0","_col1"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_9] - Select Operator [SEL_8] (rows=1 width=101) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_7] (rows=1 width=101) - predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) - Select Operator [SEL_6] (rows=1 width=101) - Output:["_col1","_col2","_col3"] - Group By Operator [GBY_5] (rows=1 width=101) + Select Operator [SEL_38] (rows=1 width=101) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_37] (rows=1 width=101) + Output:["_col0","_col1","_col2"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1 + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=1 width=101) + Output:["_col0","_col1","_col2"],aggregations:["count()"],keys:_col1, _col0 + Merge Join Operator [MERGEJOIN_51] (rows=1 width=93) + Conds:RS_30._col0=RS_31._col0(Left Semi),RS_30._col0=RS_32._col0(Left Semi),Output:["_col0","_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Group By Operator [GBY_29] (rows=3 width=56) + Output:["_col0"],keys:_col0 + Select Operator [SEL_25] (rows=6 width=70) + Output:["_col0"] + Filter Operator [FIL_50] (rows=6 width=70) + predicate:(UDFToDouble(key) > 0.0) + TableScan [TS_23] (rows=20 width=76) + default@cbo_t3,cbo_t3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_30] + PartitionCols:_col0 + Select Operator [SEL_10] (rows=1 width=93) + Output:["_col0","_col1"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_9] + Select Operator [SEL_8] (rows=1 width=101) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_7] (rows=1 width=101) + predicate:(((UDFToDouble(_col2) >= 1.0) or (_col3 >= 1)) and ((UDFToDouble(_col2) + UDFToDouble(_col3)) >= 0.0)) + Select Operator [SEL_6] (rows=1 width=101) + Output:["_col1","_col2","_col3"] + Group By Operator [GBY_5] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_4] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_3] (rows=1 width=101) + Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float + Filter Operator [FIL_48] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) + TableScan [TS_0] (rows=20 width=83) + default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col0 + Group By Operator [GBY_27] (rows=1 width=85) + Output:["_col0"],keys:_col0 + Select Operator [SEL_21] (rows=1 width=85) + Output:["_col0"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_20] + Select Operator [SEL_18] (rows=1 width=93) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_4] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_16] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) + Group By Operator [GBY_15] (rows=1 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (((c_int + 1) + 1) >= 0) and (((c_int + 1) > 0) or (UDFToDouble(key) >= 0.0)) and (UDFToDouble(key) > 0.0)) - TableScan [TS_0] (rows=20 width=83) - default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col0 - Group By Operator [GBY_27] (rows=1 width=85) - Output:["_col0"],keys:_col0 - Select Operator [SEL_21] (rows=1 width=85) - Output:["_col0"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_20] - Select Operator [SEL_18] (rows=1 width=93) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_16] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_15] (rows=1 width=101) - Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_49] (rows=1 width=93) - predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (UDFToDouble(key) > 0.0)) - TableScan [TS_12] (rows=20 width=83) - default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] + Filter Operator [FIL_49] (rows=1 width=93) + predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0)) and (c_float > 0.0) and ((c_int >= 1) or (c_float >= 1.0)) and ((UDFToFloat(c_int) + c_float) >= 0.0) and (UDFToDouble(key) > 0.0)) + TableScan [TS_12] (rows=20 width=83) + default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] PREHOOK: query: explain select cbo_t1.key as x, c_int as c_int, (((c_int+c_float)*10)+5) as y from cbo_t1 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/tez/explainuser_2.q.out ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 5530660..370bd37 100644 --- ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -308,123 +308,121 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Merge Join Operator [MERGEJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_45] - PartitionCols:_col15, _col17 - Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Merge Join Operator [MERGEJOIN_96] (rows=1464 width=10) - Conds:RS_37._col4, _col6=RS_38._col2, _col4(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] - <-Reducer 10 [SIMPLE_EDGE] - SHUFFLE [RS_37] - PartitionCols:_col4, _col6 - Merge Join Operator [MERGEJOIN_94] (rows=1331 width=10) - Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 14 [SIMPLE_EDGE] - SHUFFLE [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) - Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 9 [SIMPLE_EDGE] - SHUFFLE [RS_34] - PartitionCols:_col3 - Merge Join Operator [MERGEJOIN_93] (rows=1210 width=10) - Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 13 [SIMPLE_EDGE] - SHUFFLE [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 8 [SIMPLE_EDGE] - SHUFFLE [RS_31] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_92] (rows=1100 width=10) - Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_28] - PartitionCols:_col1 - Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 16 [SIMPLE_EDGE] - SHUFFLE [RS_38] - PartitionCols:_col2, _col4 - Merge Join Operator [MERGEJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_25] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 2 [SIMPLE_EDGE] - SHUFFLE [RS_44] - PartitionCols:_col1, _col3 - Merge Join Operator [MERGEJOIN_91] (rows=275 width=10) - Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] - <-Map 1 [SIMPLE_EDGE] - SHUFFLE [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Map 6 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Group By Operator [GBY_50] (rows=805 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 + Select Operator [SEL_47] (rows=1610 width=10) + Output:["_col12","_col20","_col2","_col13","_col21","_col3"] + Merge Join Operator [MERGEJOIN_97] (rows=1610 width=10) + Conds:RS_44._col1, _col3=RS_45._col15, _col17(Inner),Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + <-Reducer 11 [SIMPLE_EDGE] + SHUFFLE [RS_45] + PartitionCols:_col15, _col17 + Select Operator [SEL_40] (rows=1464 width=10) + Output:["_col14","_col15","_col17","_col6","_col7"] + Merge Join Operator [MERGEJOIN_96] (rows=1464 width=10) + Conds:RS_37._col4, _col6=RS_38._col2, _col4(Inner),Output:["_col2","_col3","_col14","_col15","_col17"] + <-Reducer 10 [SIMPLE_EDGE] + SHUFFLE [RS_37] + PartitionCols:_col4, _col6 + Merge Join Operator [MERGEJOIN_94] (rows=1331 width=10) + Conds:RS_34._col3=RS_35._col1(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_35] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_88] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 9 [SIMPLE_EDGE] + SHUFFLE [RS_34] + PartitionCols:_col3 + Merge Join Operator [MERGEJOIN_93] (rows=1210 width=10) + Conds:RS_31._col2=RS_32._col0(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 13 [SIMPLE_EDGE] + SHUFFLE [RS_32] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_87] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 8 [SIMPLE_EDGE] + SHUFFLE [RS_31] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_92] (rows=1100 width=10) + Conds:RS_28._col1=RS_29._col3(Inner),Output:["_col2","_col3","_col4","_col6"] + <-Map 12 [SIMPLE_EDGE] + SHUFFLE [RS_29] + PartitionCols:_col3 + Select Operator [SEL_11] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_86] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Map 7 [SIMPLE_EDGE] + SHUFFLE [RS_28] + PartitionCols:_col1 + Select Operator [SEL_8] (rows=1000 width=10) + Output:["_col1"] + Filter Operator [FIL_85] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 16 [SIMPLE_EDGE] + SHUFFLE [RS_38] + PartitionCols:_col2, _col4 + Merge Join Operator [MERGEJOIN_95] (rows=275 width=10) + Conds:RS_24._col0=RS_25._col0(Inner),Output:["_col2","_col3","_col4","_col5"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_24] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_18] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Map 17 [SIMPLE_EDGE] + SHUFFLE [RS_25] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_90] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_21] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 2 [SIMPLE_EDGE] + SHUFFLE [RS_44] + PartitionCols:_col1, _col3 + Merge Join Operator [MERGEJOIN_91] (rows=275 width=10) + Conds:RS_41._col0=RS_42._col0(Inner),Output:["_col1","_col2","_col3"] + <-Map 1 [SIMPLE_EDGE] + SHUFFLE [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=34) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Map 6 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_84] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1036,102 +1034,100 @@ Stage-0 Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_52] - Select Operator [SEL_51] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - Group By Operator [GBY_50] (rows=805 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 - <-Map 3 [SIMPLE_EDGE] - SHUFFLE [RS_49] - PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_48] (rows=1610 width=10) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col2, _col12, _col20 - Select Operator [SEL_47] (rows=1610 width=10) - Output:["_col2","_col12","_col20","_col13","_col21","_col3"] - Map Join Operator [MAPJOIN_97] (rows=1610 width=10) - Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] - <-Map 2 [BROADCAST_EDGE] - BROADCAST [RS_44] - PartitionCols:_col1, _col3 - Map Join Operator [MAPJOIN_91] (rows=275 width=10) - Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] - <-Map 1 [BROADCAST_EDGE] - BROADCAST [RS_41] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=170 width=34) - Output:["_col0","_col1","_col2","_col3"] - Filter Operator [FIL_83] (rows=170 width=34) - predicate:(v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_0] (rows=170 width=34) - default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] - <-Select Operator [SEL_5] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_84] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_3] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_40] (rows=1464 width=10) - Output:["_col14","_col15","_col17","_col6","_col7"] - Map Join Operator [MAPJOIN_96] (rows=1464 width=10) - Conds:MAPJOIN_94._col4, _col6=RS_38._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col14","_col15","_col17"] - <-Map 10 [BROADCAST_EDGE] - BROADCAST [RS_38] - PartitionCols:_col2, _col4 - Map Join Operator [MAPJOIN_95] (rows=275 width=10) - Conds:RS_24._col0=SEL_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col5"] - <-Map 9 [BROADCAST_EDGE] - BROADCAST [RS_24] - PartitionCols:_col0 - Select Operator [SEL_20] (rows=42 width=34) - Output:["_col0","_col2","_col3","_col4","_col5"] - Filter Operator [FIL_89] (rows=42 width=34) - predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) - TableScan [TS_18] (rows=85 width=34) - default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_23] (rows=250 width=10) + Group By Operator [GBY_50] (rows=805 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"],keys:KEY._col0, KEY._col1, KEY._col2 + <-Map 3 [SIMPLE_EDGE] + SHUFFLE [RS_49] + PartitionCols:_col0, _col1, _col2 + Group By Operator [GBY_48] (rows=1610 width=10) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["count(_col13)","count(_col21)","count(_col3)"],keys:_col12, _col20, _col2 + Select Operator [SEL_47] (rows=1610 width=10) + Output:["_col12","_col20","_col2","_col13","_col21","_col3"] + Map Join Operator [MAPJOIN_97] (rows=1610 width=10) + Conds:RS_44._col1, _col3=SEL_40._col15, _col17(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col12","_col13","_col20","_col21"] + <-Map 2 [BROADCAST_EDGE] + BROADCAST [RS_44] + PartitionCols:_col1, _col3 + Map Join Operator [MAPJOIN_91] (rows=275 width=10) + Conds:RS_41._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col1","_col2","_col3"] + <-Map 1 [BROADCAST_EDGE] + BROADCAST [RS_41] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=170 width=34) + Output:["_col0","_col1","_col2","_col3"] + Filter Operator [FIL_83] (rows=170 width=34) + predicate:(v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_0] (rows=170 width=34) + default@cs,cs,Tbl:COMPLETE,Col:NONE,Output:["k1","v2","k3","v3"] + <-Select Operator [SEL_5] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_84] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_3] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_40] (rows=1464 width=10) + Output:["_col14","_col15","_col17","_col6","_col7"] + Map Join Operator [MAPJOIN_96] (rows=1464 width=10) + Conds:MAPJOIN_94._col4, _col6=RS_38._col2, _col4(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col14","_col15","_col17"] + <-Map 10 [BROADCAST_EDGE] + BROADCAST [RS_38] + PartitionCols:_col2, _col4 + Map Join Operator [MAPJOIN_95] (rows=275 width=10) + Conds:RS_24._col0=SEL_23._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col5"] + <-Map 9 [BROADCAST_EDGE] + BROADCAST [RS_24] + PartitionCols:_col0 + Select Operator [SEL_20] (rows=42 width=34) + Output:["_col0","_col2","_col3","_col4","_col5"] + Filter Operator [FIL_89] (rows=42 width=34) + predicate:((v1 = 'srv1') and k2 is not null and k3 is not null and v2 is not null and v3 is not null and k1 is not null) + TableScan [TS_18] (rows=85 width=34) + default@sr,sr,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_23] (rows=250 width=10) + Output:["_col0"] + Filter Operator [FIL_90] (rows=250 width=10) + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) + TableScan [TS_21] (rows=500 width=10) + default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) + Conds:MAPJOIN_93._col3=RS_35._col1(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 8 [BROADCAST_EDGE] + BROADCAST [RS_35] + PartitionCols:_col1 + Select Operator [SEL_17] (rows=12 width=7) + Output:["_col1"] + Filter Operator [FIL_88] (rows=12 width=7) + predicate:((key = 'src1key') and value is not null) + TableScan [TS_15] (rows=25 width=7) + default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) + Conds:MAPJOIN_92._col2=RS_32._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 7 [BROADCAST_EDGE] + BROADCAST [RS_32] + PartitionCols:_col0 + Select Operator [SEL_14] (rows=250 width=10) Output:["_col0"] - Filter Operator [FIL_90] (rows=250 width=10) - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) - TableScan [TS_21] (rows=500 width=10) + Filter Operator [FIL_87] (rows=250 width=10) + predicate:((value = 'd1value') and key is not null) + TableScan [TS_12] (rows=500 width=10) default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_94] (rows=1331 width=10) - Conds:MAPJOIN_93._col3=RS_35._col1(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 8 [BROADCAST_EDGE] - BROADCAST [RS_35] - PartitionCols:_col1 - Select Operator [SEL_17] (rows=12 width=7) + <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) + Conds:SEL_8._col1=RS_29._col3(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] + <-Map 6 [BROADCAST_EDGE] + BROADCAST [RS_29] + PartitionCols:_col3 + Select Operator [SEL_11] (rows=42 width=34) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_86] (rows=42 width=34) + predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) + TableScan [TS_9] (rows=85 width=34) + default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] + <-Select Operator [SEL_8] (rows=1000 width=10) Output:["_col1"] - Filter Operator [FIL_88] (rows=12 width=7) - predicate:((key = 'src1key') and value is not null) - TableScan [TS_15] (rows=25 width=7) - default@src1,src1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_93] (rows=1210 width=10) - Conds:MAPJOIN_92._col2=RS_32._col0(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 7 [BROADCAST_EDGE] - BROADCAST [RS_32] - PartitionCols:_col0 - Select Operator [SEL_14] (rows=250 width=10) - Output:["_col0"] - Filter Operator [FIL_87] (rows=250 width=10) - predicate:((value = 'd1value') and key is not null) - TableScan [TS_12] (rows=500 width=10) - default@src,d1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Map Join Operator [MAPJOIN_92] (rows=1100 width=10) - Conds:SEL_8._col1=RS_29._col3(Inner),HybridGraceHashJoin:true,Output:["_col2","_col3","_col4","_col6"] - <-Map 6 [BROADCAST_EDGE] - BROADCAST [RS_29] - PartitionCols:_col3 - Select Operator [SEL_11] (rows=42 width=34) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_86] (rows=42 width=34) - predicate:((v3 = 'ssv3') and k2 is not null and k3 is not null and k1 is not null and v1 is not null and v2 is not null) - TableScan [TS_9] (rows=85 width=34) - default@ss,ss,Tbl:COMPLETE,Col:NONE,Output:["k1","v1","k2","v2","k3","v3"] - <-Select Operator [SEL_8] (rows=1000 width=10) - Output:["_col1"] - Filter Operator [FIL_85] (rows=1000 width=10) - predicate:((key = 'srcpartkey') and value is not null) - TableScan [TS_6] (rows=2000 width=10) - default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + Filter Operator [FIL_85] (rows=1000 width=10) + predicate:((key = 'srcpartkey') and value is not null) + TableScan [TS_6] (rows=2000 width=10) + default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE,Output:["key","value"] PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out index 7f00b06..875bdde 100644 --- ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/tez/vector_groupby_reduce.q.out @@ -790,6 +790,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -825,22 +826,33 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col2) - keys: _col0 (type: int), _col1 (type: int) - mode: complete + keys: _col1 (type: int), _col0 (type: int) + mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: int) - sort order: ++ - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Execution mode: vectorized + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/tez/vectorization_14.q.out ql/src/test/results/clientpositive/tez/vectorization_14.q.out index 2a59833..8b9d98f 100644 --- ql/src/test/results/clientpositive/tez/vectorization_14.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_14.q.out @@ -95,14 +95,14 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized @@ -110,12 +110,12 @@ STAGE PLANS: Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator diff --git ql/src/test/results/clientpositive/vector_groupby_reduce.q.out ql/src/test/results/clientpositive/vector_groupby_reduce.q.out index bc23b36..9e0abd4 100644 --- ql/src/test/results/clientpositive/vector_groupby_reduce.q.out +++ ql/src/test/results/clientpositive/vector_groupby_reduce.q.out @@ -551,8 +551,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -562,19 +561,19 @@ STAGE PLANS: alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity + expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) + outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) - keys: ss_item_sk (type: int), ss_ticket_number (type: int) + keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: int) Execution mode: vectorized @@ -585,18 +584,22 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col2) - keys: _col1 (type: int) - mode: hash + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + aggregations: sum(_col0), sum(_col2) + keys: _col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Map Reduce @@ -605,30 +608,6 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) - keys: KEY._col0 (type: int) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reduce Operator Tree: @@ -795,12 +774,12 @@ STAGE PLANS: alias: store_sales Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ss_item_sk (type: int), ss_ticket_number (type: int), ss_quantity (type: int) - outputColumnNames: ss_item_sk, ss_ticket_number, ss_quantity + expressions: ss_ticket_number (type: int), ss_item_sk (type: int), ss_quantity (type: int) + outputColumnNames: ss_ticket_number, ss_item_sk, ss_quantity Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: min(ss_quantity) - keys: ss_item_sk (type: int), ss_ticket_number (type: int) + keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE @@ -818,14 +797,14 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: int), _col1 (type: int) - mode: complete + Select Operator + expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: int), _col0 (type: int) + mode: complete outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/vectorization_14.q.out ql/src/test/results/clientpositive/vectorization_14.q.out index 6d4f13a..d809808 100644 --- ql/src/test/results/clientpositive/vectorization_14.q.out +++ ql/src/test/results/clientpositive/vectorization_14.q.out @@ -89,26 +89,26 @@ STAGE PLANS: Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1) - keys: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ - Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double) + Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: stddev_samp(VALUE._col0), max(VALUE._col1), stddev_pop(VALUE._col2), count(VALUE._col3), var_pop(VALUE._col4), var_samp(VALUE._col5) - keys: KEY._col0 (type: timestamp), KEY._col1 (type: float), KEY._col2 (type: string), KEY._col3 (type: boolean), KEY._col4 (type: double) + keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: timestamp), _col1 (type: float), _col2 (type: string), _col3 (type: boolean), _col4 (type: double), (-26.28 + _col4) (type: double), (- (-26.28 + _col4)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col4)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col4)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col4) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col4)) (type: double) + expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (UDFToDouble(_col1) * -26.28) (type: double), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE File Output Operator