From 6d1370b81d8adba498d9fdbb2df5d8be61284060 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Thu, 17 Sep 2015 21:49:18 -0700 Subject: [PATCH] HIVE-10785 : Support aggregate push down through joins --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +- .../hadoop/hive/ql/exec/FunctionRegistry.java | 3 +- .../calcite/functions/HiveSqlCountAggFunction.java | 72 + .../functions/HiveSqlMinMaxAggFunction.java | 49 + .../calcite/functions/HiveSqlSumAggFunction.java | 125 ++ .../rules/HiveAggregateJoinTransposeRule.java | 372 +++++ .../calcite/translator/SqlFunctionConverter.java | 40 +- .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 + .../hadoop/hive/ql/udf/generic/GenericUDAFSum.java | 2 +- .../ql/udf/generic/GenericUDAFSumEmptyIsZero.java | 63 + .../queries/clientpositive/groupby_join_pushdown.q | 54 + .../test/results/clientpositive/auto_join32.q.out | 206 ++- .../results/clientpositive/cbo_rp_auto_join1.q.out | 1714 +++++++++++++++++--- ql/src/test/results/clientpositive/gby_star.q.out | 137 +- .../clientpositive/groupby_join_pushdown.q.out | 1522 +++++++++++++++++ .../join_merge_multi_expressions.q.out | 218 ++- ql/src/test/results/clientpositive/lineage3.q.out | 2 +- .../results/clientpositive/mapjoin_mapjoin.q.out | 478 +++++- .../results/clientpositive/metadataonly1.q.out | 364 +++-- .../results/clientpositive/multiMapJoin2.q.out | 1713 +++++++++++++------ .../reduce_deduplicate_extended.q.out | 241 ++- .../results/clientpositive/show_functions.q.out | 1 + .../results/clientpositive/spark/auto_join32.q.out | 105 +- .../spark/join_merge_multi_expressions.q.out | 146 +- .../clientpositive/spark/mapjoin_mapjoin.q.out | 204 ++- .../spark/vectorized_nested_mapjoin.q.out | 137 +- .../tez/dynamic_partition_pruning_2.q.out | 99 +- .../results/clientpositive/tez/explainuser_1.q.out | 94 +- .../results/clientpositive/tez/explainuser_2.q.out | 1334 ++++++++++----- .../tez/hybridgrace_hashjoin_1.q.out | 392 +++-- .../clientpositive/tez/mapjoin_mapjoin.q.out | 189 ++- .../results/clientpositive/tez/metadataonly1.q.out | 54 +- ql/src/test/results/clientpositive/tez/mrr.q.out | 215 ++- .../tez/tez_dynpart_hashjoin_1.q.out | 418 +++-- .../results/clientpositive/tez/tez_join_hash.q.out | 141 +- .../results/clientpositive/tez/tez_union.q.out | 135 +- .../tez/tez_vector_dynpart_hashjoin_1.q.out | 422 +++-- .../tez/vectorized_nested_mapjoin.q.out | 105 +- .../clientpositive/vectorized_nested_mapjoin.q.out | 274 +++- 39 files changed, 9316 insertions(+), 2531 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java create mode 100644 ql/src/test/queries/clientpositive/groupby_join_pushdown.q create mode 100644 ql/src/test/results/clientpositive/groupby_join_pushdown.q.out diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index f3e2168..21572c2 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -814,7 +814,7 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { + " expressed as multiple of Local FS write cost"), HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" + " expressed as multiple of Local FS read cost"), - + AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", true, "push aggregates through join"), // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, // need to remove by hive .13. Also, do not change default (see SMB operator) diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index f1fe30d..218b2df 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -370,6 +370,7 @@ system.registerGenericUDAF("min", new GenericUDAFMin()); system.registerGenericUDAF("sum", new GenericUDAFSum()); + system.registerGenericUDAF("$SUM0", new GenericUDAFSumEmptyIsZero()); system.registerGenericUDAF("count", new GenericUDAFCount()); system.registerGenericUDAF("avg", new GenericUDAFAverage()); system.registerGenericUDAF("std", new GenericUDAFStd()); @@ -960,7 +961,7 @@ public static GenericUDAFEvaluator getGenericUDAFEvaluator(String name, GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo( args, isDistinct, isAllColumns); - + GenericUDAFEvaluator udafEvaluator; if (udafResolver instanceof GenericUDAFResolver2) { udafEvaluator = diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java new file mode 100644 index 0000000..7937040 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.functions; + +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSplittableAggFunction; +import org.apache.calcite.sql.SqlSplittableAggFunction.CountSplitter; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ImmutableIntList; + +public class HiveSqlCountAggFunction extends SqlAggFunction { + + final SqlReturnTypeInference returnTypeInference; + final SqlOperandTypeInference operandTypeInference; + final SqlOperandTypeChecker operandTypeChecker; + + public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { + super( + "count", + SqlKind.OTHER_FUNCTION, + returnTypeInference, + operandTypeInference, + operandTypeChecker, + SqlFunctionCategory.NUMERIC); + this.returnTypeInference = returnTypeInference; + this.operandTypeChecker = operandTypeChecker; + this.operandTypeInference = operandTypeInference; + } + + @Override + public T unwrap(Class clazz) { + if (clazz == SqlSplittableAggFunction.class) { + return clazz.cast(new HiveCountSplitter()); + } + return super.unwrap(clazz); + } + + class HiveCountSplitter extends CountSplitter { + + @Override + public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) { + + return AggregateCall.create( + new HiveSqlCountAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker), + false, ImmutableIntList.of(), -1, + typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true), "count"); + } + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java new file mode 100644 index 0000000..77dca1f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlMinMaxAggFunction.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.functions; + +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSplittableAggFunction; +import org.apache.calcite.sql.SqlSplittableAggFunction.SelfSplitter; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; + +public class HiveSqlMinMaxAggFunction extends SqlAggFunction { + + public HiveSqlMinMaxAggFunction(SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, boolean isMin) { + super( + isMin ? "min" : "max", + SqlKind.OTHER_FUNCTION, + returnTypeInference, + operandTypeInference, + operandTypeChecker, + SqlFunctionCategory.NUMERIC); + } + + @Override + public T unwrap(Class clazz) { + if (clazz == SqlSplittableAggFunction.class) { + return clazz.cast(SelfSplitter.INSTANCE); + } + return super.unwrap(clazz); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java new file mode 100644 index 0000000..8f62970 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.functions; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlSplittableAggFunction; +import org.apache.calcite.sql.SqlSplittableAggFunction.SumSplitter; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ImmutableIntList; + +import com.google.common.collect.ImmutableList; + +/** + * Sum is an aggregator which returns the sum of the values which + * go into it. It has precisely one argument of numeric type (int, + * long, float, double), and the result + * is the same type. + */ +public class HiveSqlSumAggFunction extends SqlAggFunction { + + final SqlReturnTypeInference returnTypeInference; + final SqlOperandTypeInference operandTypeInference; + final SqlOperandTypeChecker operandTypeChecker; + + //~ Constructors ----------------------------------------------------------- + + public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { + super( + "sum", + SqlKind.OTHER_FUNCTION, + returnTypeInference, + operandTypeInference, + operandTypeChecker, + SqlFunctionCategory.NUMERIC); + this.returnTypeInference = returnTypeInference; + this.operandTypeChecker = operandTypeChecker; + this.operandTypeInference = operandTypeInference; + } + + //~ Methods ---------------------------------------------------------------- + + + @Override + public T unwrap(Class clazz) { + if (clazz == SqlSplittableAggFunction.class) { + return clazz.cast(new HiveSumSplitter()); + } + return super.unwrap(clazz); + } + + class HiveSumSplitter extends SumSplitter { + + @Override + public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) { + RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true); + return AggregateCall.create( + new HiveSqlCountAggFunction(ReturnTypes.explicit(countRetType), operandTypeInference, operandTypeChecker), + false, ImmutableIntList.of(), -1, countRetType, "count"); + } + + @Override + public AggregateCall topSplit(RexBuilder rexBuilder, + Registry extra, int offset, RelDataType inputRowType, + AggregateCall aggregateCall, int leftSubTotal, int rightSubTotal) { + final List merges = new ArrayList<>(); + final List fieldList = inputRowType.getFieldList(); + if (leftSubTotal >= 0) { + final RelDataType type = fieldList.get(leftSubTotal).getType(); + merges.add(rexBuilder.makeInputRef(type, leftSubTotal)); + } + if (rightSubTotal >= 0) { + final RelDataType type = fieldList.get(rightSubTotal).getType(); + merges.add(rexBuilder.makeInputRef(type, rightSubTotal)); + } + RexNode node; + switch (merges.size()) { + case 1: + node = merges.get(0); + break; + case 2: + node = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, merges); + node = rexBuilder.makeAbstractCast(aggregateCall.type, node); + break; + default: + throw new AssertionError("unexpected count " + merges); + } + int ordinal = extra.register(node); + return AggregateCall.create(new HiveSqlSumAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker), + false, ImmutableList.of(ordinal), -1, aggregateCall.type, aggregateCall.name); + } + } +} + +// End SqlSumAggFunction.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java new file mode 100644 index 0000000..211b6fa --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -0,0 +1,372 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.rules.AggregateJoinTransposeRule; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlSplittableAggFunction; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.mapping.Mapping; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; + +/** + * Planner rule that pushes an + * {@link org.apache.calcite.rel.core.Aggregate} + * past a {@link org.apache.calcite.rel.core.Join}. + */ +public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule { + + /** Extended instance of the rule that can push down aggregate functions. */ + public static final HiveAggregateJoinTransposeRule INSTANCE = + new HiveAggregateJoinTransposeRule(HiveAggregate.class, HiveAggregate.HIVE_AGGR_REL_FACTORY, + HiveJoin.class, HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, true); + + private final RelFactories.AggregateFactory aggregateFactory; + + private final RelFactories.JoinFactory joinFactory; + + private final RelFactories.ProjectFactory projectFactory; + + private final boolean allowFunctions; + + /** Creates an AggregateJoinTransposeRule that may push down functions. */ + private HiveAggregateJoinTransposeRule(Class aggregateClass, + RelFactories.AggregateFactory aggregateFactory, + Class joinClass, + RelFactories.JoinFactory joinFactory, + RelFactories.ProjectFactory projectFactory, + boolean allowFunctions) { + super(aggregateClass, aggregateFactory, joinClass, joinFactory, projectFactory, true); + this.aggregateFactory = aggregateFactory; + this.joinFactory = joinFactory; + this.projectFactory = projectFactory; + this.allowFunctions = allowFunctions; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Aggregate aggregate = call.rel(0); + final Join join = call.rel(1); + final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + + // If any aggregate functions do not support splitting, bail out + // If any aggregate call has a filter, bail out + for (AggregateCall aggregateCall : aggregate.getAggCallList()) { + if (aggregateCall.getAggregation().unwrap(SqlSplittableAggFunction.class) + == null) { + return; + } + if (aggregateCall.filterArg >= 0) { + return; + } + } + + // If it is not an inner join, we do not push the + // aggregate operator + if (join.getJoinType() != JoinRelType.INNER) { + return; + } + + if (!allowFunctions && !aggregate.getAggCallList().isEmpty()) { + return; + } + + // Do the columns used by the join appear in the output of the aggregate? + final ImmutableBitSet aggregateColumns = aggregate.getGroupSet(); + final ImmutableBitSet keyColumns = keyColumns(aggregateColumns, + RelMetadataQuery.getPulledUpPredicates(join).pulledUpPredicates); + final ImmutableBitSet joinColumns = + RelOptUtil.InputFinder.bits(join.getCondition()); + final boolean allColumnsInAggregate = + keyColumns.contains(joinColumns); + final ImmutableBitSet belowAggregateColumns = + aggregateColumns.union(joinColumns); + + // Split join condition + final List leftKeys = Lists.newArrayList(); + final List rightKeys = Lists.newArrayList(); + RexNode nonEquiConj = + RelOptUtil.splitJoinCondition(join.getLeft(), join.getRight(), + join.getCondition(), leftKeys, rightKeys); + // If it contains non-equi join conditions, we bail out + if (!nonEquiConj.isAlwaysTrue()) { + return; + } + + // Push each aggregate function down to each side that contains all of its + // arguments. Note that COUNT(*), because it has no arguments, can go to + // both sides. + final Map map = new HashMap<>(); + final List sides = new ArrayList<>(); + int uniqueCount = 0; + int offset = 0; + int belowOffset = 0; + for (int s = 0; s < 2; s++) { + final Side side = new Side(); + final RelNode joinInput = join.getInput(s); + int fieldCount = joinInput.getRowType().getFieldCount(); + final ImmutableBitSet fieldSet = + ImmutableBitSet.range(offset, offset + fieldCount); + final ImmutableBitSet belowAggregateKeyNotShifted = + belowAggregateColumns.intersect(fieldSet); + for (Ord c : Ord.zip(belowAggregateKeyNotShifted)) { + map.put(c.e, belowOffset + c.i); + } + final ImmutableBitSet belowAggregateKey = + belowAggregateKeyNotShifted.shift(-offset); + final boolean unique; + if (!allowFunctions) { + assert aggregate.getAggCallList().isEmpty(); + // If there are no functions, it doesn't matter as much whether we + // aggregate the inputs before the join, because there will not be + // any functions experiencing a cartesian product effect. + // + // But finding out whether the input is already unique requires a call + // to areColumnsUnique that currently (until [CALCITE-794] "Detect + // cycles when computing statistics" is fixed) places a heavy load on + // the metadata system. + // + // So we choose to imagine the the input is already unique, which is + // untrue but harmless. + // + unique = true; + } else { + final Boolean unique0 = + RelMetadataQuery.areColumnsUnique(joinInput, belowAggregateKey); + unique = unique0 != null && unique0; + } + if (unique) { + ++uniqueCount; + side.newInput = joinInput; + } else { + List belowAggCalls = new ArrayList<>(); + final SqlSplittableAggFunction.Registry + belowAggCallRegistry = registry(belowAggCalls); + final Mappings.TargetMapping mapping = + s == 0 + ? Mappings.createIdentity(fieldCount) + : Mappings.createShiftMapping(fieldCount + offset, 0, offset, + fieldCount); + for (Ord aggCall : Ord.zip(aggregate.getAggCallList())) { + final SqlAggFunction aggregation = aggCall.e.getAggregation(); + final SqlSplittableAggFunction splitter = + Preconditions.checkNotNull( + aggregation.unwrap(SqlSplittableAggFunction.class)); + final AggregateCall call1; + if (fieldSet.contains(ImmutableBitSet.of(aggCall.e.getArgList()))) { + call1 = splitter.split(aggCall.e, mapping); + } else { + call1 = splitter.other(rexBuilder.getTypeFactory(), aggCall.e); + } + if (call1 != null) { + side.split.put(aggCall.i, + belowAggregateKey.cardinality() + + belowAggCallRegistry.register(call1)); + } + } + side.newInput = aggregateFactory.createAggregate(joinInput, false, + belowAggregateKey, null, belowAggCalls); + } + offset += fieldCount; + belowOffset += side.newInput.getRowType().getFieldCount(); + sides.add(side); + } + + if (uniqueCount == 2) { + // Both inputs to the join are unique. There is nothing to be gained by + // this rule. In fact, this aggregate+join may be the result of a previous + // invocation of this rule; if we continue we might loop forever. + return; + } + + // Update condition + final Mapping mapping = (Mapping) Mappings.target( + new Function() { + @Override + public Integer apply(Integer a0) { + return map.get(a0); + } + }, + join.getRowType().getFieldCount(), + belowOffset); + final RexNode newCondition = + RexUtil.apply(mapping, join.getCondition()); + + // Create new join + RelNode newJoin = joinFactory.createJoin(sides.get(0).newInput, + sides.get(1).newInput, newCondition, join.getJoinType(), + join.getVariablesStopped(), join.isSemiJoinDone()); + + // Aggregate above to sum up the sub-totals + final List newAggCalls = new ArrayList<>(); + final int groupIndicatorCount = + aggregate.getGroupCount() + aggregate.getIndicatorCount(); + final int newLeftWidth = sides.get(0).newInput.getRowType().getFieldCount(); + final List projects = + new ArrayList<>(rexBuilder.identityProjects(newJoin.getRowType())); + for (Ord aggCall : Ord.zip(aggregate.getAggCallList())) { + final SqlAggFunction aggregation = aggCall.e.getAggregation(); + final SqlSplittableAggFunction splitter = + Preconditions.checkNotNull( + aggregation.unwrap(SqlSplittableAggFunction.class)); + final Integer leftSubTotal = sides.get(0).split.get(aggCall.i); + final Integer rightSubTotal = sides.get(1).split.get(aggCall.i); + newAggCalls.add( + splitter.topSplit(rexBuilder, registry(projects), + groupIndicatorCount, newJoin.getRowType(), aggCall.e, + leftSubTotal == null ? -1 : leftSubTotal, + rightSubTotal == null ? -1 : rightSubTotal + newLeftWidth)); + } + RelNode r = newJoin; + b: + if (allColumnsInAggregate && newAggCalls.isEmpty() && + RelOptUtil.areRowTypesEqual(r.getRowType(), aggregate.getRowType(), false)) { + // no need to aggregate + } else { + r = RelOptUtil.createProject(r, projects, null, true, projectFactory); + if (allColumnsInAggregate) { + // let's see if we can convert + List projects2 = new ArrayList<>(); + for (int key : Mappings.apply(mapping, aggregate.getGroupSet())) { + projects2.add(rexBuilder.makeInputRef(r, key)); + } + for (AggregateCall newAggCall : newAggCalls) { + final SqlSplittableAggFunction splitter = + newAggCall.getAggregation() + .unwrap(SqlSplittableAggFunction.class); + if (splitter != null) { + projects2.add( + splitter.singleton(rexBuilder, r.getRowType(), newAggCall)); + } + } + if (projects2.size() + == aggregate.getGroupSet().cardinality() + newAggCalls.size()) { + // We successfully converted agg calls into projects. + r = RelOptUtil.createProject(r, projects2, null, true, projectFactory); + break b; + } + } + r = aggregateFactory.createAggregate(r, aggregate.indicator, + Mappings.apply(mapping, aggregate.getGroupSet()), + Mappings.apply2(mapping, aggregate.getGroupSets()), newAggCalls); + } + call.transformTo(r); + } + + /** Computes the closure of a set of columns according to a given list of + * constraints. Each 'x = y' constraint causes bit y to be set if bit x is + * set, and vice versa. */ + private static ImmutableBitSet keyColumns(ImmutableBitSet aggregateColumns, + ImmutableList predicates) { + SortedMap equivalence = new TreeMap<>(); + for (RexNode pred : predicates) { + populateEquivalences(equivalence, pred); + } + ImmutableBitSet keyColumns = aggregateColumns; + for (Integer aggregateColumn : aggregateColumns) { + final BitSet bitSet = equivalence.get(aggregateColumn); + if (bitSet != null) { + keyColumns = keyColumns.union(bitSet); + } + } + return keyColumns; + } + + private static void populateEquivalences(Map equivalence, + RexNode predicate) { + switch (predicate.getKind()) { + case EQUALS: + RexCall call = (RexCall) predicate; + final List operands = call.getOperands(); + if (operands.get(0) instanceof RexInputRef) { + final RexInputRef ref0 = (RexInputRef) operands.get(0); + if (operands.get(1) instanceof RexInputRef) { + final RexInputRef ref1 = (RexInputRef) operands.get(1); + populateEquivalence(equivalence, ref0.getIndex(), ref1.getIndex()); + populateEquivalence(equivalence, ref1.getIndex(), ref0.getIndex()); + } + } + } + } + + private static void populateEquivalence(Map equivalence, + int i0, int i1) { + BitSet bitSet = equivalence.get(i0); + if (bitSet == null) { + bitSet = new BitSet(); + equivalence.put(i0, bitSet); + } + bitSet.set(i1); + } + + /** Creates a {@link org.apache.calcite.sql.SqlSplittableAggFunction.Registry} + * that is a view of a list. */ + private static SqlSplittableAggFunction.Registry + registry(final List list) { + return new SqlSplittableAggFunction.Registry() { + @Override + public int register(E e) { + int i = list.indexOf(e); + if (i < 0) { + i = list.size(); + list.add(e); + } + return i; + } + }; + } + + /** Work space for an input to a join. */ + private static class Side { + final Map split = new HashMap<>(); + RelNode newInput; + } +} + +// End AggregateJoinTransposeRule.java + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index fd78824..d59c6bb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -45,6 +45,9 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -310,6 +313,7 @@ private static String getName(GenericUDF hiveUDF) { registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in")); registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between")); registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct")); + } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { @@ -339,8 +343,7 @@ private static HiveToken hToken(int type, String text) { // UDAF is assumed to be deterministic public static class CalciteUDAF extends SqlAggFunction { public CalciteUDAF(String opName, SqlReturnTypeInference returnTypeInference, - SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, - ImmutableList argTypes, RelDataType retType) { + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { super(opName, SqlKind.OTHER_FUNCTION, returnTypeInference, operandTypeInference, operandTypeChecker, SqlFunctionCategory.USER_DEFINED_FUNCTION); } @@ -367,8 +370,6 @@ public boolean isDeterministic() { private SqlReturnTypeInference returnTypeInference; private SqlOperandTypeInference operandTypeInference; private SqlOperandTypeChecker operandTypeChecker; - private ImmutableList argTypes; - private RelDataType retType; } private static CalciteUDFInfo getUDFInfo(String hiveUdfName, @@ -382,10 +383,6 @@ private static CalciteUDFInfo getUDFInfo(String hiveUdfName, typeFamilyBuilder.add(Util.first(at.getSqlTypeName().getFamily(), SqlTypeFamily.ANY)); } udfInfo.operandTypeChecker = OperandTypes.family(typeFamilyBuilder.build()); - - udfInfo.argTypes = ImmutableList. copyOf(calciteArgTypes); - udfInfo.retType = calciteRetType; - return udfInfo; } @@ -413,13 +410,34 @@ public static SqlOperator getCalciteFn(String hiveUdfName, public static SqlAggFunction getCalciteAggFn(String hiveUdfName, ImmutableList calciteArgTypes, RelDataType calciteRetType) { SqlAggFunction calciteAggFn = (SqlAggFunction) hiveToCalcite.get(hiveUdfName); + if (calciteAggFn == null) { CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); - calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker, uInf.argTypes, uInf.retType); - } + switch (hiveUdfName.toLowerCase()) { + case "sum": + calciteAggFn = new HiveSqlSumAggFunction(uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker); + break; + case "count": + calciteAggFn = new HiveSqlCountAggFunction(uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker); + break; + case "min": + calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker, true); + break; + case "max": + calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker, false); + break; + default: + calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, + uInf.operandTypeInference, uInf.operandTypeChecker); + break; + } + } return calciteAggFn; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0a7ce3a..9c731b8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -63,6 +63,7 @@ import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.rules.AggregateJoinTransposeRule; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; @@ -134,6 +135,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateJoinTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveAggregateProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; @@ -885,6 +887,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE); + if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { + hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE); + } hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 5a5846e..c6ffbec 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -356,7 +356,7 @@ protected Double getCurrentIntermediateResult( */ public static class GenericUDAFSumLong extends GenericUDAFEvaluator { private PrimitiveObjectInspector inputOI; - private LongWritable result; + protected LongWritable result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java new file mode 100644 index 0000000..ab7ab04 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSumEmptyIsZero.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +@Description(name = "$SUM0", value = "_FUNC_(x) - Returns the sum of a set of numbers, zero if empty") +public class GenericUDAFSumEmptyIsZero extends GenericUDAFSum { + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) + throws SemanticException { + if (parameters.length != 1) { + throw new UDFArgumentTypeException(parameters.length - 1, + "Exactly one argument is expected."); + } + + if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentTypeException(0, + "Only primitive type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) { + case LONG: + return new SumZeroIfEmpty(); + default: + throw new UDFArgumentTypeException(0, + "Only bigint type arguments are accepted but " + + parameters[0].getTypeName() + " is passed."); + } + } + + public static class SumZeroIfEmpty extends GenericUDAFSumLong { + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + SumLongAgg myagg = (SumLongAgg) agg; + result.set(myagg.sum); + return result; + } + } +} + diff --git a/ql/src/test/queries/clientpositive/groupby_join_pushdown.q b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q new file mode 100644 index 0000000..b8ad77a --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_join_pushdown.q @@ -0,0 +1,54 @@ +EXPLAIN +SELECT f.key, g.key, count(g.key) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key; + +EXPLAIN +SELECT f.key, g.key +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key; + +EXPLAIN +SELECT DISTINCT f.value, g.value +FROM src f JOIN src g ON(f.value = g.value); + +EXPLAIN +SELECT f.key, g.key, COUNT(*) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key; + +EXPLAIN +SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint) +GROUP BY f.ctinyint, g.ctinyint ; + +EXPLAIN +SELECT f.cbigint, g.cbigint, MAX(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint) +GROUP BY f.cbigint, g.cbigint ; + +explain +SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + +explain +SELECT MIN(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + +explain +SELECT count(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + +explain +SELECT count(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + +explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint; + diff --git a/ql/src/test/results/clientpositive/auto_join32.q.out b/ql/src/test/results/clientpositive/auto_join32.q.out index 161ab6b..0313905 100644 --- a/ql/src/test/results/clientpositive/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/auto_join32.q.out @@ -386,7 +386,14 @@ group by s.name POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1, Stage-3 , consists of Stage-7, Stage-8, Stage-2 + Stage-7 has a backup stage: Stage-2 + Stage-4 depends on stages: Stage-7 + Stage-8 has a backup stage: Stage-2 + Stage-5 depends on stages: Stage-8 + Stage-2 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-4, Stage-5, Stage-2 STAGE PLANS: Stage: Stage-1 @@ -402,37 +409,190 @@ STAGE PLANS: expressions: name (type: string), registration (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 - Select Operator - expressions: _col3 (type: string), _col1 (type: string) - outputColumnNames: _col3, _col1 - Group By Operator - aggregations: count(DISTINCT _col1) - keys: _col3 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE File Output Operator compressed: false table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Conditional Operator + + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: string), (_col1 * _col3) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: string), (_col1 * _col3) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col2 (type: string), (_col1 * _col3) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((p = 'bar') and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index d52586f..79be505 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -88,7 +88,10 @@ select count(*) from ( POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -104,35 +107,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq1:b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -178,8 +287,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -195,34 +306,60 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: key - Select Operator - expressions: key (type: int) - outputColumnNames: key - Group By Operator - keys: key (type: int) - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: key + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + Statistics: Num rows: 4 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Select Operator + Statistics: Num rows: 4 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -230,28 +367,69 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:subq1:b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -323,8 +501,11 @@ on src1.key = src2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 + Stage-2 depends on stages: Stage-1, Stage-3, Stage-5 + Stage-3 is a root stage Stage-4 is a root stage + Stage-5 depends on stages: Stage-4, Stage-6 + Stage-6 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -341,32 +522,26 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: key - Select Operator - expressions: key (type: int) - outputColumnNames: key - Group By Operator - aggregations: count() - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -382,31 +557,85 @@ STAGE PLANS: key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f1 (type: bigint) TableScan Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint), $f10 (type: bigint) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 key (type: int) 1 key (type: int) - outputColumnNames: key, $f1, $f10 + 2 key (type: int) + outputColumnNames: key, $f1, $f10, $f11, $f100 + Statistics: Num rows: 2 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), $f10 (type: bigint), $f1 (type: bigint) + expressions: key (type: int), ($f11 * $f100) (type: bigint), ($f1 * $f10) (type: bigint) outputColumnNames: key, cnt1, cnt11 + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src2:subq2:b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -420,32 +649,26 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: key - Select Operator - expressions: key (type: int) - outputColumnNames: key - Group By Operator - aggregations: count() - keys: key (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -453,28 +676,101 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: key, $f1, $f10 + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from -( - select key, count(*) as cnt1 from - ( - select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq1 group by key -) src1 -join -( - select key, count(*) as cnt1 from - ( - select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key - ) subq2 group by key -) src2 -on src1.key = src2.key -PREHOOK: type: QUERY + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src1:subq1:b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY PREHOOK: Input: default@tbl1 PREHOOK: Input: default@tbl2 #### A masked pattern was here #### @@ -523,7 +819,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -539,35 +838,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -623,7 +1028,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -639,35 +1047,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -747,7 +1261,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -763,39 +1280,145 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) mode: mergepartial - outputColumnNames: $f0 - Select Operator - expressions: $f0 (type: bigint) - outputColumnNames: $f0 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + mode: mergepartial + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: $f0 (type: bigint) + outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq4:subq3:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: ListSink PREHOOK: query: select count(*) from @@ -861,7 +1484,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -877,35 +1503,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 8) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -951,8 +1683,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -965,41 +1699,67 @@ STAGE PLANS: predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (key + 1) (type: int) outputColumnNames: key Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (key + 1) (type: int) - sort order: + - Map-reduce partition columns: (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: subq2:a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (key + 1) (type: int) - sort order: + - Map-reduce partition columns: (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 (key + 1) (type: int) - 1 (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() + aggregations: $sum0($f4) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -1010,7 +1770,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1020,7 +1780,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -1036,25 +1796,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink - -PREHOOK: query: select count(*) from - (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 - join - (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 - on subq1.key = subq2.key -PREHOOK: type: QUERY -PREHOOK: Input: default@tbl1 -PREHOOK: Input: default@tbl2 -#### A masked pattern was here #### -POSTHOOK: query: select count(*) from - (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 - join - (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: key + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 on subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@tbl1 @@ -1077,7 +1876,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1093,35 +1895,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1169,7 +2077,11 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-5 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1185,37 +2097,190 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + keys: + 0 key (type: int) + 1 key (type: int) + 2 key (type: int) + outputColumnNames: $f1, $f10, $f11 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (($f1 * $f10) * $f11) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: subq3:a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key < 6) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -1279,7 +2344,10 @@ on subq2.key = b.key) a POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1295,35 +2363,141 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - Select Operator - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Reduce Output Operator - sort order: - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: $sum0($f4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: bigint) outputColumnNames: $f0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a:b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((key < 8) and (key < 6)) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + bucketGroup: true + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/gby_star.q.out b/ql/src/test/results/clientpositive/gby_star.q.out index fb71835..3eb2ab6 100644 --- a/ql/src/test/results/clientpositive/gby_star.q.out +++ b/ql/src/test/results/clientpositive/gby_star.q.out @@ -231,7 +231,8 @@ inner join src on a.key = src.key group by a.key limit 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -248,26 +249,49 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (UDFToDouble(key) < 100.0) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Join Operator condition map: @@ -275,48 +299,61 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - keys: _col0 (type: string) - mode: hash + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 * _col3) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) < 100.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out new file mode 100644 index 0000000..17df98f --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby_join_pushdown.q.out @@ -0,0 +1,1522 @@ +PREHOOK: query: EXPLAIN +SELECT f.key, g.key, count(g.key) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.key, count(g.key) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, g.key +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.key +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT DISTINCT f.value, g.value +FROM src f JOIN src g ON(f.value = g.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT DISTINCT f.value, g.value +FROM src f JOIN src g ON(f.value = g.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.key, g.key, COUNT(*) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.key, g.key, COUNT(*) +FROM src f JOIN src g ON(f.key = g.key) +GROUP BY f.key, g.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col1 * _col3) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.ctinyint, g.ctinyint, SUM(f.cbigint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cint = g.cint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: tinyint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col3 (type: tinyint), (_col2 * _col5) (type: bigint) + outputColumnNames: _col0, _col3, _col6 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col6) + keys: _col0 (type: tinyint), _col3 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: tinyint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: tinyint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN +SELECT f.cbigint, g.cbigint, MAX(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint) +GROUP BY f.cbigint, g.cbigint +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT f.cbigint, g.cbigint, MAX(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.cbigint = g.cbigint) +GROUP BY f.cbigint, g.cbigint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cbigint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), cbigint (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col0) + keys: _col1 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col2 (type: bigint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: cbigint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT f.ctinyint, g.ctinyint, MIN(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col0) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col2 (type: tinyint), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT MIN(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT MIN(f.cint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(_col1) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT count(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT count(f.ctinyint) +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col0) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT count(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT count(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT sum(f.cint), f.ctinyint +FROM alltypesorc f JOIN alltypesorc g ON(f.ctinyint = g.ctinyint) +GROUP BY f.ctinyint, g.ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint), _col0 (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: f + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ctinyint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out index b73643e..9891c89 100644 --- a/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/join_merge_multi_expressions.q.out @@ -8,8 +8,12 @@ select count(*) from srcpart a join srcpart b on a.key = b.key and a.hr = b.hr j POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-5 + Stage-3 depends on stages: Stage-2, Stage-6 + Stage-4 depends on stages: Stage-3 + Stage-5 is a root stage + Stage-6 is a root stage + Stage-0 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -25,56 +29,62 @@ STAGE PLANS: expressions: key (type: string), hr (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col5) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -82,7 +92,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col2 * _col5) (type: bigint) + outputColumnNames: _col6 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan @@ -92,7 +144,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +156,84 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), hr (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), hr (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/lineage3.q.out b/ql/src/test/results/clientpositive/lineage3.q.out index ad965c8..65a0eee 100644 --- a/ql/src/test/results/clientpositive/lineage3.q.out +++ b/ql/src/test/results/clientpositive/lineage3.q.out @@ -317,7 +317,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Input: default@dest_v3 #### A masked pattern was here #### -{"version":"1.0","engine":"mr","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) ctinyint) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) csmallint)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) csmallint)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6],"targets":[1],"expression":"count(default.alltypesorc.cstring1)","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2],"expression":"(a.cboolean2 = true)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2],"expression":"(a.cfloat > 0.0)","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2],"expression":"(count(default.alltypesorc.cint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} +{"version":"1.0","engine":"mr","hash":"40bccc0722002f798d0548b59e369e83","queryText":"select * from dest_v3 limit 2","edges":[{"sources":[3,4,5,6,7,8],"targets":[0],"expression":"(tok_function sum (. (tok_table_or_col $hdt$_0) $f0) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) $f1)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) $f1)))) (tok_windowvalues (preceding 2147483647) current)))","edgeType":"PROJECTION"},{"sources":[6,7],"targets":[1],"expression":"$sum0((count(*) * count(default.alltypesorc.cstring1)))","edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[9],"targets":[0,1,2],"expression":"(a.cboolean2 = true)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2],"expression":"(a.cint = a.cint)","edgeType":"PREDICATE"},{"sources":[10],"targets":[0,1,2],"expression":"(a.cfloat > 0.0)","edgeType":"PREDICATE"},{"sources":[8,6],"targets":[0,1,2],"expression":"($sum0((count(default.alltypesorc.cint) * count(*))) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"dest_v3.a"},{"id":1,"vertexType":"COLUMN","vertexId":"dest_v3.x"},{"id":2,"vertexType":"COLUMN","vertexId":"dest_v3.cboolean1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.csmallint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"TABLE","vertexId":"default.alltypesorc"},{"id":7,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":8,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":9,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean2"},{"id":10,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cfloat"}]} 38 216 false 38 229 true PREHOOK: query: drop table if exists src_dp diff --git a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out index e92f307..74fa715 100644 --- a/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out @@ -556,37 +556,27 @@ POSTHOOK: query: explain select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-13 depends on stages: Stage-1, Stage-6 , consists of Stage-16, Stage-17, Stage-2 + Stage-16 has a backup stage: Stage-2 + Stage-11 depends on stages: Stage-16 + Stage-3 depends on stages: Stage-2, Stage-11, Stage-12 + Stage-10 depends on stages: Stage-3, Stage-7 , consists of Stage-14, Stage-15, Stage-4 + Stage-14 has a backup stage: Stage-4 + Stage-8 depends on stages: Stage-14 + Stage-5 depends on stages: Stage-4, Stage-8, Stage-9 + Stage-15 has a backup stage: Stage-4 + Stage-9 depends on stages: Stage-15 + Stage-4 + Stage-17 has a backup stage: Stage-2 + Stage-12 depends on stages: Stage-17 + Stage-2 + Stage-6 is a root stage + Stage-7 is a root stage + Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-9 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:src - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - - Stage: Stage-7 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -599,14 +589,71 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Conditional Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:$hdt$_0:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:$hdt$_0:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col3 * _col5) (type: bigint) + outputColumnNames: _col0, _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -616,30 +663,48 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-8 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-10 + Conditional Operator + + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_2:src + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_2:src + $hdt$_0:$INTNAME1 TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) - Stage: Stage-3 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -649,41 +714,310 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col2 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), (_col2 * _col4) (type: bigint) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-15 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), (_col2 * _col4) (type: bigint) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), (_col2 * _col4) (type: bigint) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:$hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$hdt$_0:$hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col3 * _col5) (type: bigint) + outputColumnNames: _col0, _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: bigint) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col3 * _col5) (type: bigint) + outputColumnNames: _col0, _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/metadataonly1.q.out b/ql/src/test/results/clientpositive/metadataonly1.q.out index 727b2e7..828fe61 100644 --- a/ql/src/test/results/clientpositive/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/metadataonly1.q.out @@ -589,13 +589,14 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -604,26 +605,29 @@ STAGE PLANS: GatherStats: false Select Operator expressions: ds (type: string) - outputColumnNames: ds + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: max(ds) + aggregations: count() + keys: _col0 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE tag: -1 - value expressions: _col0 (type: string) + value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: - -mr-10005default.test1{ds=1} [$hdt$_1:a2] - -mr-10006default.test1{ds=2} [$hdt$_1:a2] +#### A masked pattern was here #### Path -> Partition: - -mr-10005default.test1{ds=1} +#### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + base file name: ds=1 + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 @@ -638,9 +642,9 @@ STAGE PLANS: partition_columns.types string serialization.ddl struct test1 { i32 a, double b} serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -660,9 +664,10 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 name: default.test1 - -mr-10006default.test1{ds=2} +#### A masked pattern was here #### Partition - input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + base file name: ds=2 + input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2 @@ -677,9 +682,9 @@ STAGE PLANS: partition_columns.types string serialization.ddl struct test1 { i32 a, double b} serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.NullStructSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -700,55 +705,47 @@ STAGE PLANS: name: default.test1 name: default.test1 Truncated Path -> Alias: - -mr-10005default.test1{ds=1} [$hdt$_1:a2] - -mr-10006default.test1{ds=2} [$hdt$_1:a2] + /test1/ds=1 [$hdt$_0:$hdt$_0:a2] + /test1/ds=2 [$hdt$_0:$hdt$_0:a2] Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Filter Operator - isSamplingPred: false - predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 0 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0 - columns.types string - escape.delim \ - serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types string,bigint + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: a2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE GatherStats: false - Select Operator - expressions: ds (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: 0 - auto parallelism: false + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: false TableScan GatherStats: false Reduce Output Operator @@ -763,12 +760,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10004 + base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0 - columns.types string + columns _col0,_col1 + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe @@ -776,94 +773,32 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - columns _col0 - columns.types string + columns _col0,_col1 + columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: ds=1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 1 - properties: - bucket_count -1 - columns a,b - columns.comments - columns.types int:double -#### A masked pattern was here #### - name default.test1 - partition_columns ds - partition_columns.types string - serialization.ddl struct test1 { i32 a, double b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns a,b - columns.comments - columns.types int:double -#### A masked pattern was here #### - name default.test1 - partition_columns ds - partition_columns.types string - serialization.ddl struct test1 { i32 a, double b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - name: default.test1 -#### A masked pattern was here #### - Partition - base file name: ds=2 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2 + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - bucket_count -1 - columns a,b - columns.comments - columns.types int:double -#### A masked pattern was here #### - name default.test1 - partition_columns ds - partition_columns.types string - serialization.ddl struct test1 { i32 a, double b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - bucket_count -1 - columns a,b - columns.comments - columns.types int:double -#### A masked pattern was here #### - name default.test1 - partition_columns ds - partition_columns.types string - serialization.ddl struct test1 { i32 a, double b} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test1 - name: default.test1 + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: - /test1/ds=1 [$hdt$_0:a2] - /test1/ds=2 [$hdt$_0:a2] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -873,9 +808,10 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count() + aggregations: $sum0(_col1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -897,7 +833,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -913,7 +849,7 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -936,7 +872,7 @@ STAGE PLANS: Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -962,6 +898,142 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: a2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + GatherStats: false + Select Operator + expressions: ds (type: string) + outputColumnNames: ds + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: max(ds) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: string) + auto parallelism: false + Path -> Alias: + -mr-10006default.test1{ds=1} [$hdt$_1:a2] + -mr-10007default.test1{ds=2} [$hdt$_1:a2] + Path -> Partition: + -mr-10006default.test1{ds=1} + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 1 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + -mr-10007default.test1{ds=2} + Partition + input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2 + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.NullStructSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.NullStructSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns a,b + columns.comments + columns.types int:double +#### A masked pattern was here #### + name default.test1 + partition_columns ds + partition_columns.types string + serialization.ddl struct test1 { i32 a, double b} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test1 + name: default.test1 + Truncated Path -> Alias: + -mr-10006default.test1{ds=1} [$hdt$_1:a2] + -mr-10007default.test1{ds=2} [$hdt$_1:a2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Filter Operator + isSamplingPred: false + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/multiMapJoin2.q.out index 46b717f..a1b7d48 100644 --- a/ql/src/test/results/clientpositive/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/multiMapJoin2.q.out @@ -743,45 +743,34 @@ GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-17 is a root stage - Stage-2 depends on stages: Stage-17 - Stage-12 depends on stages: Stage-2, Stage-8 , consists of Stage-15, Stage-16, Stage-3 - Stage-15 has a backup stage: Stage-3 - Stage-10 depends on stages: Stage-15 + Stage-1 is a root stage + Stage-15 depends on stages: Stage-1, Stage-6 , consists of Stage-21, Stage-22, Stage-2 + Stage-21 has a backup stage: Stage-2 + Stage-13 depends on stages: Stage-21 + Stage-12 depends on stages: Stage-2, Stage-8, Stage-13, Stage-14, Stage-16, Stage-17 , consists of Stage-19, Stage-20, Stage-3 + Stage-19 has a backup stage: Stage-3 + Stage-10 depends on stages: Stage-19 Stage-4 depends on stages: Stage-3, Stage-10, Stage-11 Stage-5 depends on stages: Stage-4 - Stage-16 has a backup stage: Stage-3 - Stage-11 depends on stages: Stage-16 + Stage-20 has a backup stage: Stage-3 + Stage-11 depends on stages: Stage-20 Stage-3 - Stage-18 is a root stage - Stage-8 depends on stages: Stage-18 + Stage-22 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-22 + Stage-2 + Stage-6 is a root stage + Stage-7 is a root stage + Stage-18 depends on stages: Stage-7, Stage-9 , consists of Stage-23, Stage-24, Stage-8 + Stage-23 has a backup stage: Stage-8 + Stage-16 depends on stages: Stage-23 + Stage-24 has a backup stage: Stage-8 + Stage-17 depends on stages: Stage-24 + Stage-8 + Stage-9 is a root stage Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-17 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:y1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:y1 - TableScan - alias: y1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -794,32 +783,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -827,10 +806,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-15 + Conditional Operator + + Stage: Stage-21 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-12 Conditional Operator - Stage: Stage-15 + Stage: Stage-19 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -855,13 +872,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -879,7 +896,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -887,7 +904,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -902,21 +919,21 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-16 + Stage: Stage-20 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -941,13 +958,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -965,13 +982,13 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: @@ -980,13 +997,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -994,14 +1011,75 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-18 + Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:$hdt$_2:y1 + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_1:$hdt$_2:y1 + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1012,12 +1090,30 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -1030,32 +1126,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1063,114 +1149,113 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-0 - Fetch Operator - limit: -1 - Processor Tree: - ListSink + Stage: Stage-18 + Conditional Operator -PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt -FROM (SELECT x1.key AS key - FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) - GROUP BY x1.key) tmp1 -JOIN (SELECT x2.key AS key - FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) - GROUP BY x2.key) tmp2 -ON (tmp1.key = tmp2.key) -GROUP BY tmp1.key -ORDER BY key, cnt -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Input: default@src1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt -FROM (SELECT x1.key AS key - FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) - GROUP BY x1.key) tmp1 -JOIN (SELECT x2.key AS key - FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) - GROUP BY x2.key) tmp2 -ON (tmp1.key = tmp2.key) -GROUP BY tmp1.key -ORDER BY key, cnt -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Input: default@src1 -#### A masked pattern was here #### -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-18:MAPREDLOCAL -RUN: Stage-2:MAPRED -RUN: Stage-8:MAPRED -RUN: Stage-12:CONDITIONAL -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-10:MAPRED -RUN: Stage-4:MAPRED -RUN: Stage-5:MAPRED -128 1 -146 1 -150 1 -213 1 -224 1 -238 1 -255 1 -273 1 -278 1 -311 1 -369 1 -401 1 -406 1 -66 1 -98 1 -PREHOOK: query: -- When Correlation Optimizer is enabled, --- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, --- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of --- tmp1 and tmp2. The second job will do the ORDER BY. -EXPLAIN -SELECT tmp1.key as key, count(*) as cnt -FROM (SELECT x1.key AS key - FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) - GROUP BY x1.key) tmp1 -JOIN (SELECT x2.key AS key - FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) - GROUP BY x2.key) tmp2 -ON (tmp1.key = tmp2.key) -GROUP BY tmp1.key -ORDER BY key, cnt -PREHOOK: type: QUERY -POSTHOOK: query: -- When Correlation Optimizer is enabled, --- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, --- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of --- tmp1 and tmp2. The second job will do the ORDER BY. -EXPLAIN -SELECT tmp1.key as key, count(*) as cnt -FROM (SELECT x1.key AS key - FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) - GROUP BY x1.key) tmp1 -JOIN (SELECT x2.key AS key - FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) - GROUP BY x2.key) tmp2 -ON (tmp1.key = tmp2.key) -GROUP BY tmp1.key -ORDER BY key, cnt -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-2 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 - -STAGE PLANS: - Stage: Stage-9 + Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:y1 + $hdt$_1:$INTNAME1 Fetch Operator limit: -1 - $hdt$_1:$hdt$_2:y1 + Alias -> Map Local Operator Tree: + $hdt$_1:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-24 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_1:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:y1 + $hdt$_1:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1181,11 +1266,154 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - $hdt$_1:$hdt$_2:y1 + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +RUN: Stage-1:MAPRED +RUN: Stage-6:MAPRED +RUN: Stage-7:MAPRED +RUN: Stage-9:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:CONDITIONAL +RUN: Stage-21:MAPREDLOCAL +RUN: Stage-23:MAPREDLOCAL +RUN: Stage-13:MAPRED +RUN: Stage-16:MAPRED +RUN: Stage-12:CONDITIONAL +RUN: Stage-19:MAPREDLOCAL +RUN: Stage-10:MAPRED +RUN: Stage-4:MAPRED +RUN: Stage-5:MAPRED +128 1 +146 1 +150 1 +213 1 +224 1 +238 1 +255 1 +273 1 +278 1 +311 1 +369 1 +401 1 +406 1 +66 1 +98 1 +PREHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +PREHOOK: type: QUERY +POSTHOOK: query: -- When Correlation Optimizer is enabled, +-- we will use two jobs. This first MR job will evaluate sub-queries of tmp1, tmp2, +-- the Join of tmp1 and tmp2, and the aggregation on the result of the Join of +-- tmp1 and tmp2. The second job will do the ORDER BY. +EXPLAIN +SELECT tmp1.key as key, count(*) as cnt +FROM (SELECT x1.key AS key + FROM src x1 JOIN src1 y1 ON (x1.key = y1.key) + GROUP BY x1.key) tmp1 +JOIN (SELECT x2.key AS key + FROM src x2 JOIN src1 y2 ON (x2.key = y2.key) + GROUP BY x2.key) tmp2 +ON (tmp1.key = tmp2.key) +GROUP BY tmp1.key +ORDER BY key, cnt +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: y1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1196,14 +1424,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1214,64 +1444,46 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan - alias: x1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: y1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Demux Operator - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -1282,25 +1494,35 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 550 Data size: 5842 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -1311,20 +1533,108 @@ STAGE PLANS: Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 263 Data size: 2755 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 526 Data size: 5510 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -1379,9 +1689,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -RUN: Stage-9:MAPREDLOCAL +RUN: Stage-1:MAPRED RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED 128 1 146 1 150 1 @@ -1436,45 +1745,55 @@ GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-10 depends on stages: Stage-2, Stage-7 , consists of Stage-12, Stage-13, Stage-3 - Stage-12 has a backup stage: Stage-3 - Stage-8 depends on stages: Stage-12 + Stage-1 is a root stage + Stage-13 depends on stages: Stage-1, Stage-6 , consists of Stage-16, Stage-17, Stage-2 + Stage-16 has a backup stage: Stage-2 + Stage-11 depends on stages: Stage-16 + Stage-10 depends on stages: Stage-2, Stage-7, Stage-11, Stage-12 , consists of Stage-14, Stage-15, Stage-3 + Stage-14 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-14 Stage-4 depends on stages: Stage-3, Stage-8, Stage-9 Stage-5 depends on stages: Stage-4 - Stage-13 has a backup stage: Stage-3 - Stage-9 depends on stages: Stage-13 + Stage-15 has a backup stage: Stage-3 + Stage-9 depends on stages: Stage-15 Stage-3 - Stage-14 is a root stage - Stage-2 depends on stages: Stage-14 + Stage-17 has a backup stage: Stage-2 + Stage-12 depends on stages: Stage-17 + Stage-2 + Stage-6 is a root stage + Stage-7 is a root stage Stage-0 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - alias: x1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: x2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: key (type: string) - mode: hash + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1482,10 +1801,48 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-13 + Conditional Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + Stage: Stage-10 Conditional Operator - Stage: Stage-12 + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -1510,13 +1867,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1534,7 +1891,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator @@ -1542,7 +1899,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1557,21 +1914,21 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 803 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-13 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -1596,13 +1953,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1620,7 +1977,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE TableScan Reduce Output Operator key expressions: _col0 (type: string) @@ -1635,13 +1992,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 150 Data size: 1606 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1649,14 +2006,75 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:x1 + $hdt$_0:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:x1 + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1667,50 +2085,54 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan - alias: x2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: x1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: key (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1752,11 +2174,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### +RUN: Stage-1:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-7:MAPRED -RUN: Stage-14:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-13:CONDITIONAL +RUN: Stage-16:MAPREDLOCAL +RUN: Stage-11:MAPRED RUN: Stage-10:CONDITIONAL -RUN: Stage-13:MAPREDLOCAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-9:MAPRED RUN: Stage-4:MAPRED RUN: Stage-5:MAPRED @@ -1808,20 +2233,34 @@ GROUP BY tmp1.key ORDER BY key, cnt POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-2 depends on stages: Stage-7 - Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:$hdt$_1:x1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_1:x1 + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: x2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1832,14 +2271,16 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - - Stage: Stage-2 - Map Reduce - Map Operator Tree: + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE TableScan alias: x1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -1856,46 +2297,16 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - TableScan - alias: x2 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work Reduce Operator Tree: Demux Operator - Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 @@ -1922,33 +2333,82 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 144 Data size: 1510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE Mux Operator - Statistics: Num rows: 288 Data size: 3020 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col1 + outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE + Mux Operator + Statistics: Num rows: 276 Data size: 2854 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 138 Data size: 1427 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Mux Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -2003,9 +2463,8 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -RUN: Stage-7:MAPREDLOCAL +RUN: Stage-1:MAPRED RUN: Stage-2:MAPRED -RUN: Stage-3:MAPRED 128 1 146 1 150 1 @@ -2188,39 +2647,28 @@ SELECT * FROM ( ) x POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-15 is a root stage - Stage-4 depends on stages: Stage-15 - Stage-14 depends on stages: Stage-4, Stage-8 - Stage-2 depends on stages: Stage-14 - Stage-16 is a root stage - Stage-8 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-2 - -STAGE PLANS: - Stage: Stage-15 - Map Reduce Local Work - Alias -> Map Local Tables: - null-subquery1:$hdt$_0-subquery1:$hdt$_1:$hdt$_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - null-subquery1:$hdt$_0-subquery1:$hdt$_1:$hdt$_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Stage-3 is a root stage + Stage-13 depends on stages: Stage-3, Stage-5 , consists of Stage-19, Stage-20, Stage-4 + Stage-19 has a backup stage: Stage-4 + Stage-11 depends on stages: Stage-19 + Stage-18 depends on stages: Stage-4, Stage-11, Stage-12, Stage-8, Stage-15, Stage-16 + Stage-2 depends on stages: Stage-18 + Stage-20 has a backup stage: Stage-4 + Stage-12 depends on stages: Stage-20 + Stage-4 + Stage-5 is a root stage + Stage-7 is a root stage + Stage-17 depends on stages: Stage-7, Stage-9 , consists of Stage-21, Stage-22, Stage-8 + Stage-21 has a backup stage: Stage-8 + Stage-15 depends on stages: Stage-21 + Stage-22 has a backup stage: Stage-8 + Stage-16 depends on stages: Stage-22 + Stage-8 + Stage-9 is a root stage + Stage-0 depends on stages: Stage-2 - Stage: Stage-4 +STAGE PLANS: + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -2233,32 +2681,22 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Local Work: - Map Reduce Local Work + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2266,7 +2704,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-13 + Conditional Operator + + Stage: Stage-19 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: null-subquery1:$hdt$_0-subquery1:$hdt$_0:a @@ -2349,14 +2825,75 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-16 + Stage: Stage-20 Map Reduce Local Work Alias -> Map Local Tables: - null-subquery2:$hdt$_0-subquery2:$hdt$_1:$hdt$_1:a + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - null-subquery2:$hdt$_0-subquery2:$hdt$_1:$hdt$_1:a + null-subquery1:$hdt$_0-subquery1:$hdt$_1:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2367,12 +2904,30 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-8 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -2385,32 +2940,162 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-17 + Conditional Operator + + Stage: Stage-21 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$INTNAME1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-22 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:$hdt$_0-subquery2:$hdt$_1:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2448,11 +3133,17 @@ POSTHOOK: query: SELECT * FROM ( POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -RUN: Stage-15:MAPREDLOCAL -RUN: Stage-16:MAPREDLOCAL -RUN: Stage-4:MAPRED -RUN: Stage-8:MAPRED -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-3:MAPRED +RUN: Stage-5:MAPRED +RUN: Stage-7:MAPRED +RUN: Stage-9:MAPRED +RUN: Stage-13:CONDITIONAL +RUN: Stage-17:CONDITIONAL +RUN: Stage-19:MAPREDLOCAL +RUN: Stage-21:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:MAPRED +RUN: Stage-18:MAPREDLOCAL RUN: Stage-2:MAPRED 0 0 diff --git a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out index e3ebee7..46c17ff 100644 --- a/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out +++ b/ql/src/test/results/clientpositive/reduce_deduplicate_extended.q.out @@ -227,7 +227,8 @@ explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key gr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -244,42 +245,29 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: sum(_col0) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0) - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -292,30 +280,78 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: double) + expressions: _col0 (type: string), (_col2 * _col4) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1 @@ -2718,7 +2754,8 @@ explain select src.key, sum(src.key) FROM src JOIN src1 ON src.key = src1.key gr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: @@ -2736,11 +2773,69 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: sum(KEY._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col2 * _col4) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -2757,14 +2852,12 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 13 Data size: 99 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2772,34 +2865,6 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Group By Operator - aggregations: sum(KEY._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: double) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator limit: -1 diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index fbcd86a..789bedf 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -4,6 +4,7 @@ POSTHOOK: query: SHOW FUNCTIONS POSTHOOK: type: SHOWFUNCTIONS ! != +$sum0 % & * diff --git a/ql/src/test/results/clientpositive/spark/auto_join32.q.out b/ql/src/test/results/clientpositive/spark/auto_join32.q.out index 679dd79..4c73f96 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join32.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join32.q.out @@ -415,10 +415,53 @@ and v.p = 'bar' group by s.name POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 5 <- Map 4 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: ((p = 'bar') and name is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: name (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reducer 5 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Stage: Stage-1 Spark Edges: @@ -438,43 +481,51 @@ STAGE PLANS: expressions: name (type: string), registration (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col1, _col3 + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - keys: _col1 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Reducer 2 + Local Work: + Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(_col0) - keys: _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + input vertices: + 1 Reducer 5 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(_col0) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out index a18d82e..aa46c62 100644 --- a/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out +++ b/ql/src/test/results/clientpositive/spark/join_merge_multi_expressions.q.out @@ -14,8 +14,12 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 7 <- Map 1 (GROUP, 2) + Reducer 9 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 7 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -30,69 +34,79 @@ STAGE PLANS: expressions: key (type: string), hr (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 4 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map 5 - Map Operator Tree: - TableScan - alias: a - Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), hr (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - Inner Join 0 to 2 keys: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col1 (type: string) - 2 _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), (_col2 * _col5) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string), _col0 (type: string) + 1 _col1 (type: string), _col0 (type: string) + outputColumnNames: _col2, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col2 * _col5) (type: bigint) + outputColumnNames: _col6 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +117,34 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col1 (type: string), _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out index 549911c..a234c4d 100644 --- a/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/mapjoin_mapjoin.q.out @@ -581,55 +581,107 @@ select count(*) from srcpart join src on (srcpart.value=src.value) join src src1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 8 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 3 + Map 7 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 Local Work: Map Reduce Local Work - Map 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + + Stage: Stage-3 + Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) + expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 6 Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -644,55 +696,105 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Reducer 2 Local Work: Map Reduce Local Work - Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col3 * _col5) (type: bigint) + outputColumnNames: _col0, _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), (_col2 * _col4) (type: bigint) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out index 316ed63..101a351 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out @@ -8,49 +8,78 @@ explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td fr POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: v1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (ctinyint is not null and csmallint is not null) (type: boolean) - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + predicate: csmallint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: tinyint) - 1 _col0 (type: tinyint) + expressions: csmallint (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reducer 6 Local Work: Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) Execution mode: vectorized - Map 4 + + Stage: Stage-3 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: v1 Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + predicate: (ctinyint is not null and csmallint is not null) (type: boolean) + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: csmallint (type: smallint) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) + 0 _col0 (type: tinyint) + 1 _col0 (type: tinyint) Local Work: Map Reduce Local Work Execution mode: vectorized @@ -58,7 +87,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -87,29 +117,56 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 7433 Data size: 228226 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6758 Data size: 207479 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Local Work: Map Reduce Local Work Execution mode: vectorized Reducer 3 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 103739 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 3716 Data size: 114112 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: double) + outputColumnNames: _col4 + Statistics: Num rows: 3716 Data size: 114112 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -139,4 +196,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932486892E11 +6.065190932486951E11 diff --git a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out index c8e9da4..fc9bf9e 100644 --- a/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out +++ b/ql/src/test/results/clientpositive/tez/dynamic_partition_pruning_2.q.out @@ -997,8 +997,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1011,26 +1012,19 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: s1 @@ -1040,15 +1034,54 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -1059,6 +1092,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index 7d9d99e..98c8750 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -2927,54 +2927,72 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) +Reducer 5 <- Map 4 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 3 - File Output Operator [FS_15] + File Output Operator [FS_22] compressed:false Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Group By Operator [GBY_13] - | aggregations:["count(VALUE._col0)"] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_12] - sort order: - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: bigint) - Group By Operator [GBY_11] - aggregations:["count(_col0)"] - outputColumnNames:["_col0"] + Select Operator [SEL_20] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator [MERGEJOIN_26] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_17] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_6] + | | aggregations:["count(VALUE._col0)"] + | | outputColumnNames:["_col0"] + | | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_5] + | sort order: + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: bigint) + | Group By Operator [GBY_4] + | aggregations:["count(_col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_2] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_0] + | alias:cbo_t1 + | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + sort order: Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_10] - outputColumnNames:["_col0"] - Statistics:Num rows: 200 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Merge Join Operator [MERGEJOIN_19] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{} - | Statistics:Num rows: 200 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_7] - | sort order: - | Statistics:Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_2] - | Statistics:Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:cbo_t1 - | Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_8] - sort order: - Statistics:Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_5] - Statistics:Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_3] + value expressions:_col0 (type: bigint) + Group By Operator [GBY_14] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_13] + sort order: + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_12] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_10] + Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_8] alias:cbo_t2 Statistics:Num rows: 20 Data size: 262 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out index 57fcc3c..e9eabfd 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_2.q.out @@ -320,263 +320,517 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 10 <- Map 14 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) -Reducer 11 <- Reducer 10 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 16 <- Map 15 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) -Reducer 3 <- Reducer 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 28 (SIMPLE_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 17 <- Map 16 (SIMPLE_EDGE) +Reducer 18 <- Reducer 17 (SIMPLE_EDGE), Reducer 22 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE) +Reducer 21 <- Reducer 20 (SIMPLE_EDGE), Reducer 24 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE) +Reducer 24 <- Map 23 (SIMPLE_EDGE) +Reducer 26 <- Map 25 (SIMPLE_EDGE) +Reducer 27 <- Reducer 26 (SIMPLE_EDGE), Reducer 30 (SIMPLE_EDGE) +Reducer 28 <- Reducer 27 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) +Reducer 30 <- Map 29 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) -Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) -Reducer 9 <- Map 13 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE) +Reducer 5 <- Reducer 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_68] + Reducer 7 + File Output Operator [FS_139] compressed:false - Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Limit [LIM_67] + Limit [LIM_138] Number of rows:100 - Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_66] + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_137] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_65] + | Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_136] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Select Operator [SEL_64] + Select Operator [SEL_135] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_63] - | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_134] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)","$sum0(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_62] + | Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [SIMPLE_EDGE] + Reduce Output Operator [RS_133] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Group By Operator [GBY_61] - aggregations:["count(_col13)","count(_col21)","count(_col3)"] - keys:_col2 (type: string), _col12 (type: string), _col20 (type: string) + Group By Operator [GBY_132] + aggregations:["$sum0(_col12)","$sum0(_col13)","$sum0(_col14)"] + keys:_col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_60] - outputColumnNames:["_col2","_col12","_col20","_col13","_col21","_col3"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_110] + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_130] + outputColumnNames:["_col1","_col5","_col6","_col12","_col13","_col14"] + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_181] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"0":"_col1 (type: string), _col3 (type: string)","1":"_col15 (type: string), _col17 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 11 [SIMPLE_EDGE] - | Reduce Output Operator [RS_58] - | key expressions:_col15 (type: string), _col17 (type: string) - | Map-reduce partition columns:_col15 (type: string), _col17 (type: string) + | keys:{"0":"_col0 (type: string), _col2 (type: string)","1":"_col2 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col1","_col3","_col4","_col5","_col6","_col9","_col10","_col11"] + | Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 15 [SIMPLE_EDGE] + | Reduce Output Operator [RS_128] + | key expressions:_col2 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col2 (type: string), _col3 (type: string) | sort order:++ - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col6 (type: string), _col7 (type: string), _col14 (type: string) - | Select Operator [SEL_49] - | outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - | Merge Join Operator [MERGEJOIN_109] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"0":"_col4 (type: string), _col6 (type: string)","1":"_col2 (type: string), _col4 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col14","_col15","_col17"] - | | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 10 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_45] - | | key expressions:_col4 (type: string), _col6 (type: string) - | | Map-reduce partition columns:_col4 (type: string), _col6 (type: string) - | | sort order:++ - | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string) - | | Merge Join Operator [MERGEJOIN_107] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"0":"_col3 (type: string)","1":"_col1 (type: string)"} - | | | outputColumnNames:["_col2","_col3","_col4","_col6"] - | | | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 14 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_42] - | | | key expressions:_col1 (type: string) - | | | Map-reduce partition columns:_col1 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_16] - | | | outputColumnNames:["_col1"] - | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_101] - | | | predicate:((key = 'src1key') and value is not null) (type: boolean) - | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_14] - | | | alias:src1 - | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 9 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_40] - | | key expressions:_col3 (type: string) - | | Map-reduce partition columns:_col3 (type: string) - | | sort order:+ - | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col4 (type: string), _col6 (type: string) - | | Merge Join Operator [MERGEJOIN_106] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"0":"_col2 (type: string)","1":"_col0 (type: string)"} - | | | outputColumnNames:["_col2","_col3","_col4","_col6"] - | | | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 13 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_37] - | | | key expressions:_col0 (type: string) - | | | Map-reduce partition columns:_col0 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | Select Operator [SEL_13] - | | | outputColumnNames:["_col0"] - | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_100] - | | | predicate:((value = 'd1value') and key is not null) (type: boolean) - | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_11] - | | | alias:d1 - | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | |<-Reducer 8 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_35] - | | key expressions:_col2 (type: string) - | | Map-reduce partition columns:_col2 (type: string) - | | sort order:+ - | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col3 (type: string), _col4 (type: string), _col6 (type: string) - | | Merge Join Operator [MERGEJOIN_105] - | | | condition map:[{"":"Inner Join 0 to 1"}] - | | | keys:{"0":"_col1 (type: string)","1":"_col3 (type: string)"} - | | | outputColumnNames:["_col2","_col3","_col4","_col6"] - | | | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 12 [SIMPLE_EDGE] - | | | Reduce Output Operator [RS_32] - | | | key expressions:_col3 (type: string) - | | | Map-reduce partition columns:_col3 (type: string) - | | | sort order:+ - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | | | Select Operator [SEL_10] - | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | Filter Operator [FIL_99] - | | | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | | TableScan [TS_8] - | | | alias:ss - | | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | | |<-Map 7 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_30] - | | key expressions:_col1 (type: string) - | | Map-reduce partition columns:_col1 (type: string) - | | sort order:+ - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | Select Operator [SEL_7] - | | outputColumnNames:["_col1"] - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_98] - | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_5] - | | alias:srcpart - | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE - | |<-Reducer 16 [SIMPLE_EDGE] - | Reduce Output Operator [RS_47] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Merge Join Operator [MERGEJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 15 [SIMPLE_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_19] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_17] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Map 17 [SIMPLE_EDGE] - | Reduce Output Operator [RS_26] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_22] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_103] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_20] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_56] - key expressions:_col1 (type: string), _col3 (type: string) - Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | Statistics:Num rows: 45 Data size: 480 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + | Group By Operator [GBY_123] + | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)","$sum0(VALUE._col2)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | Statistics:Num rows: 45 Data size: 480 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 14 [SIMPLE_EDGE] + | Reduce Output Operator [RS_122] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | sort order:++++ + | Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + | Group By Operator [GBY_121] + | aggregations:["$sum0(_col11)","$sum0(_col12)","$sum0(_col13)"] + | keys:_col0 (type: string), _col5 (type: string), _col6 (type: string), _col8 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_119] + | outputColumnNames:["_col0","_col5","_col6","_col8","_col11","_col12","_col13"] + | Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_180] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"0":"_col1 (type: string), _col2 (type: string)","1":"_col0 (type: string), _col2 (type: string)"} + | | outputColumnNames:["_col0","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + | | Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 13 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_115] + | | key expressions:_col1 (type: string), _col2 (type: string) + | | Map-reduce partition columns:_col1 (type: string), _col2 (type: string) + | | sort order:++ + | | Statistics:Num rows: 82 Data size: 874 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col3 (type: bigint), _col4 (type: bigint) + | | Group By Operator [GBY_85] + | | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | | | Statistics:Num rows: 82 Data size: 874 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 12 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_84] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + | | sort order:+++ + | | Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col3 (type: bigint), _col4 (type: bigint) + | | Group By Operator [GBY_83] + | | aggregations:["$sum0(_col8)","$sum0(_col9)"] + | | keys:_col2 (type: string), _col4 (type: string), _col5 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | | Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_81] + | | outputColumnNames:["_col2","_col4","_col5","_col8","_col9"] + | | Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + | | Merge Join Operator [MERGEJOIN_178] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"0":"_col0 (type: string)","1":"_col1 (type: string)"} + | | | outputColumnNames:["_col1","_col2","_col4","_col5","_col6","_col7"] + | | | Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 11 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_77] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_32] + | | | | aggregations:["count(VALUE._col0)"] + | | | | keys:KEY._col0 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Statistics:Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 10 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_31] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_30] + | | | aggregations:["count()"] + | | | keys:_col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | | Select Operator [SEL_28] + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_169] + | | | predicate:((key = 'src1key') and value is not null) (type: boolean) + | | | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_26] + | | | alias:src1 + | | | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 18 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_79] + | | key expressions:_col1 (type: string) + | | Map-reduce partition columns:_col1 (type: string) + | | sort order:+ + | | Statistics:Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | | Select Operator [SEL_74] + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + | | Merge Join Operator [MERGEJOIN_177] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} + | | | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + | | | Statistics:Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 17 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_70] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_40] + | | | | aggregations:["count(VALUE._col0)"] + | | | | keys:KEY._col0 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 16 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_39] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_38] + | | | aggregations:["count()"] + | | | keys:_col0 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | Select Operator [SEL_36] + | | | outputColumnNames:["_col0"] + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_170] + | | | predicate:((value = 'd1value') and key is not null) (type: boolean) + | | | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_34] + | | | alias:d1 + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 22 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_72] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | | Group By Operator [GBY_67] + | | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 21 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_66] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | | sort order:++++ + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col4 (type: bigint), _col5 (type: bigint) + | | Group By Operator [GBY_65] + | | aggregations:["$sum0(_col9)","$sum0(_col10)"] + | | keys:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_63] + | | outputColumnNames:["_col2","_col3","_col4","_col6","_col9","_col10"] + | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | Merge Join Operator [MERGEJOIN_176] + | | | condition map:[{"":"Inner Join 0 to 1"}] + | | | keys:{"0":"_col0 (type: string)","1":"_col3 (type: string)"} + | | | outputColumnNames:["_col1","_col2","_col3","_col4","_col6","_col7","_col8"] + | | | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 20 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_59] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_48] + | | | | aggregations:["count(VALUE._col0)"] + | | | | keys:KEY._col0 (type: string) + | | | | outputColumnNames:["_col0","_col1"] + | | | | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + | | | |<-Map 19 [SIMPLE_EDGE] + | | | Reduce Output Operator [RS_47] + | | | key expressions:_col0 (type: string) + | | | Map-reduce partition columns:_col0 (type: string) + | | | sort order:+ + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | | value expressions:_col1 (type: bigint) + | | | Group By Operator [GBY_46] + | | | aggregations:["count()"] + | | | keys:_col1 (type: string) + | | | outputColumnNames:["_col0","_col1"] + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | | Select Operator [SEL_44] + | | | outputColumnNames:["_col1"] + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | | Filter Operator [FIL_171] + | | | predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + | | | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + | | | TableScan [TS_42] + | | | alias:srcpart + | | | Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + | | |<-Reducer 24 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_61] + | | key expressions:_col3 (type: string) + | | Map-reduce partition columns:_col3 (type: string) + | | sort order:+ + | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_56] + | | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 23 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_55] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | sort order:+++++ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_54] + | | aggregations:["count(_col1)","count()"] + | | keys:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_52] + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_172] + | | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_50] + | | alias:ss + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 28 [SIMPLE_EDGE] + | Reduce Output Operator [RS_117] + | key expressions:_col0 (type: string), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col2 (type: string) + | sort order:++ + | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_112] + | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 27 [SIMPLE_EDGE] + | Reduce Output Operator [RS_111] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | sort order:++++ + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_110] + | aggregations:["$sum0(_col9)","$sum0(_col10)"] + | keys:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_108] + | outputColumnNames:["_col1","_col2","_col3","_col4","_col9","_col10"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Merge Join Operator [MERGEJOIN_179] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col6","_col8"] + | | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 26 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_104] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_93] + | | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 25 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_92] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | sort order:+++++ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_91] + | | aggregations:["count()","count(_col3)"] + | | keys:_col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_89] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_173] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_87] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 30 [SIMPLE_EDGE] + | Reduce Output Operator [RS_106] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_101] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | |<-Map 29 [SIMPLE_EDGE] + | Reduce Output Operator [RS_100] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_99] + | aggregations:["count()"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_97] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_174] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_95] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 4 [SIMPLE_EDGE] + Reduce Output Operator [RS_126] + key expressions:_col0 (type: string), _col2 (type: string) + Map-reduce partition columns:_col0 (type: string), _col2 (type: string) sort order:++ - Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - value expressions:_col2 (type: string) - Merge Join Operator [MERGEJOIN_104] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} - | outputColumnNames:["_col1","_col2","_col3"] - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_51] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | Select Operator [SEL_1] - | outputColumnNames:["_col0","_col1","_col2","_col3"] - | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_96] - | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_0] - | alias:cs - | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - |<-Map 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_53] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_4] - outputColumnNames:["_col0"] - Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_97] - predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_2] - alias:d1 - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: string), _col3 (type: bigint), _col4 (type: bigint) + Group By Operator [GBY_24] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 3 [SIMPLE_EDGE] + Reduce Output Operator [RS_23] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint) + Group By Operator [GBY_22] + aggregations:["$sum0(_col8)","$sum0(_col9)"] + keys:_col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_20] + outputColumnNames:["_col1","_col2","_col3","_col8","_col9"] + Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator [MERGEJOIN_175] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col7"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_16] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 11 Data size: 381 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_5] + | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 11 Data size: 381 Basic stats: COMPLETE Column stats: NONE + | |<-Map 1 [SIMPLE_EDGE] + | Reduce Output Operator [RS_4] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | sort order:++++ + | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_3] + | aggregations:["count()","count(_col3)"] + | keys:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_1] + | outputColumnNames:["_col0","_col1","_col2","_col3"] + | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_167] + | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_0] + | alias:cs + | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [SIMPLE_EDGE] + Reduce Output Operator [RS_18] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_13] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + |<-Map 8 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_11] + aggregations:["count()"] + keys:_col0 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_9] + outputColumnNames:["_col0"] + Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_168] + predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_7] + alias:d1 + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value @@ -1514,213 +1768,461 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 10 <- Map 9 (BROADCAST_EDGE) -Map 2 <- Map 1 (BROADCAST_EDGE) -Map 3 <- Map 10 (BROADCAST_EDGE), Map 2 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) -Reducer 4 <- Map 3 (SIMPLE_EDGE) +Reducer 11 <- Map 10 (SIMPLE_EDGE), Reducer 18 (BROADCAST_EDGE) +Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) +Reducer 13 <- Reducer 12 (SIMPLE_EDGE), Reducer 23 (BROADCAST_EDGE) +Reducer 14 <- Reducer 13 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) +Reducer 15 <- Reducer 14 (SIMPLE_EDGE) +Reducer 16 <- Reducer 15 (SIMPLE_EDGE) +Reducer 18 <- Map 17 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE) +Reducer 20 <- Map 19 (SIMPLE_EDGE) +Reducer 22 <- Map 21 (SIMPLE_EDGE), Reducer 20 (BROADCAST_EDGE) +Reducer 23 <- Reducer 22 (SIMPLE_EDGE) +Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 2 (BROADCAST_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 5 - File Output Operator [FS_68] + Reducer 16 + File Output Operator [FS_139] compressed:false - Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Limit [LIM_67] + Limit [LIM_138] Number of rows:100 - Statistics:Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_66] + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_137] | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Reducer 4 [SIMPLE_EDGE] - Reduce Output Operator [RS_65] + | Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 15 [SIMPLE_EDGE] + Reduce Output Operator [RS_136] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Select Operator [SEL_64] + Select Operator [SEL_135] outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - Group By Operator [GBY_63] - | aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)"] + Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + Group By Operator [GBY_134] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)","$sum0(VALUE._col2)"] | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - | Statistics:Num rows: 402 Data size: 4276 Basic stats: COMPLETE Column stats: NONE - |<-Map 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_62] + | Statistics:Num rows: 24 Data size: 258 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 14 [SIMPLE_EDGE] + Reduce Output Operator [RS_133] key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) sort order:+++ - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE value expressions:_col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint) - Group By Operator [GBY_61] - aggregations:["count(_col13)","count(_col21)","count(_col3)"] - keys:_col2 (type: string), _col12 (type: string), _col20 (type: string) + Group By Operator [GBY_132] + aggregations:["$sum0(_col12)","$sum0(_col13)","$sum0(_col14)"] + keys:_col1 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Select Operator [SEL_60] - outputColumnNames:["_col2","_col12","_col20","_col13","_col21","_col3"] - Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_110] + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_130] + outputColumnNames:["_col1","_col5","_col6","_col12","_col13","_col14"] + Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_181] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 2":"_col1 (type: string), _col3 (type: string)","Map 3":"_col15 (type: string), _col17 (type: string)"} - | outputColumnNames:["_col2","_col3","_col12","_col13","_col20","_col21"] - | Statistics:Num rows: 804 Data size: 8552 Basic stats: COMPLETE Column stats: NONE - |<-Map 2 [BROADCAST_EDGE] - | Reduce Output Operator [RS_56] - | key expressions:_col1 (type: string), _col3 (type: string) - | Map-reduce partition columns:_col1 (type: string), _col3 (type: string) + | keys:{"Reducer 5":"_col0 (type: string), _col2 (type: string)","Reducer 14":"_col2 (type: string), _col3 (type: string)"} + | outputColumnNames:["_col1","_col3","_col4","_col5","_col6","_col9","_col10","_col11"] + | Statistics:Num rows: 49 Data size: 528 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 5 [BROADCAST_EDGE] + | Reduce Output Operator [RS_126] + | key expressions:_col0 (type: string), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col2 (type: string) | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col2 (type: string) - | Map Join Operator [MAPJOIN_104] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 1":"_col0 (type: string)","Map 2":"_col0 (type: string)"} - | | outputColumnNames:["_col1","_col2","_col3"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 1 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_51] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string) - | | Select Operator [SEL_1] - | | outputColumnNames:["_col0","_col1","_col2","_col3"] - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_96] - | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_0] - | | alias:cs - | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_4] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_97] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_2] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_49] - outputColumnNames:["_col14","_col15","_col17","_col6","_col7"] - Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - Map Join Operator [MAPJOIN_109] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col4 (type: string), _col6 (type: string)","Map 10":"_col2 (type: string), _col4 (type: string)"} - | outputColumnNames:["_col2","_col3","_col14","_col15","_col17"] - | Statistics:Num rows: 731 Data size: 7775 Basic stats: COMPLETE Column stats: NONE - |<-Map 10 [BROADCAST_EDGE] - | Reduce Output Operator [RS_47] - | key expressions:_col2 (type: string), _col4 (type: string) - | Map-reduce partition columns:_col2 (type: string), _col4 (type: string) - | sort order:++ - | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col3 (type: string), _col5 (type: string) - | Map Join Operator [MAPJOIN_108] - | | condition map:[{"":"Inner Join 0 to 1"}] - | | keys:{"Map 9":"_col0 (type: string)","Map 10":"_col0 (type: string)"} - | | outputColumnNames:["_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE - | |<-Map 9 [BROADCAST_EDGE] - | | Reduce Output Operator [RS_24] - | | key expressions:_col0 (type: string) - | | Map-reduce partition columns:_col0 (type: string) - | | sort order:+ - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | value expressions:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) - | | Select Operator [SEL_19] - | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | Filter Operator [FIL_102] - | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) - | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | | TableScan [TS_17] - | | alias:sr - | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - | |<-Select Operator [SEL_22] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_103] - | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_20] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_107] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col3 (type: string)","Map 8":"_col1 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 665 Data size: 7069 Basic stats: COMPLETE Column stats: NONE - |<-Map 8 [BROADCAST_EDGE] - | Reduce Output Operator [RS_42] - | key expressions:_col1 (type: string) - | Map-reduce partition columns:_col1 (type: string) - | sort order:+ - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_16] - | outputColumnNames:["_col1"] - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_101] - | predicate:((key = 'src1key') and value is not null) (type: boolean) - | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_14] - | alias:src1 - | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_106] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col2 (type: string)","Map 7":"_col0 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [BROADCAST_EDGE] - | Reduce Output Operator [RS_37] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Select Operator [SEL_13] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_100] - | predicate:((value = 'd1value') and key is not null) (type: boolean) - | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_11] - | alias:d1 - | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - |<-Map Join Operator [MAPJOIN_105] + | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col3 (type: bigint), _col4 (type: bigint) + | Group By Operator [GBY_24] + | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 4 [SIMPLE_EDGE] + | Reduce Output Operator [RS_23] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + | sort order:+++ + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col3 (type: bigint), _col4 (type: bigint) + | Group By Operator [GBY_22] + | aggregations:["$sum0(_col8)","$sum0(_col9)"] + | keys:_col1 (type: string), _col2 (type: string), _col3 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_20] + | outputColumnNames:["_col1","_col2","_col3","_col8","_col9"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_175] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 2":"_col0 (type: string)","Reducer 4":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col7"] + | | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 2 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_16] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 11 Data size: 381 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | | Group By Operator [GBY_5] + | | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | | Statistics:Num rows: 11 Data size: 381 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_4] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | | sort order:++++ + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col4 (type: bigint), _col5 (type: bigint) + | | Group By Operator [GBY_3] + | | aggregations:["count()","count(_col3)"] + | | keys:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_1] + | | outputColumnNames:["_col0","_col1","_col2","_col3"] + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_167] + | | predicate:((k1 is not null and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 22 Data size: 762 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_0] + | | alias:cs + | | Statistics:Num rows: 170 Data size: 5890 Basic stats: COMPLETE Column stats: NONE + | |<-Group By Operator [GBY_13] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | |<-Map 3 [SIMPLE_EDGE] + | Reduce Output Operator [RS_12] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_11] + | aggregations:["count()"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_9] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_168] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_7] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Group By Operator [GBY_123] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)","$sum0(VALUE._col2)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | Statistics:Num rows: 45 Data size: 480 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 13 [SIMPLE_EDGE] + Reduce Output Operator [RS_122] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order:++++ + Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + value expressions:_col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint) + Group By Operator [GBY_121] + aggregations:["$sum0(_col11)","$sum0(_col12)","$sum0(_col13)"] + keys:_col0 (type: string), _col5 (type: string), _col6 (type: string), _col8 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_119] + outputColumnNames:["_col0","_col5","_col6","_col8","_col11","_col12","_col13"] + Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_180] | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{"Map 3":"_col1 (type: string)","Map 6":"_col3 (type: string)"} - | outputColumnNames:["_col2","_col3","_col4","_col6"] - | Statistics:Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - |<-Map 6 [BROADCAST_EDGE] - | Reduce Output Operator [RS_32] - | key expressions:_col3 (type: string) - | Map-reduce partition columns:_col3 (type: string) - | sort order:+ - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) - | Select Operator [SEL_10] - | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | Filter Operator [FIL_99] - | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) - | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE - | TableScan [TS_8] - | alias:ss - | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE - |<-Select Operator [SEL_7] - outputColumnNames:["_col1"] - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_98] - predicate:((key = 'srcpartkey') and value is not null) (type: boolean) - Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TableScan [TS_5] - alias:srcpart - Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + | keys:{"Reducer 13":"_col1 (type: string), _col2 (type: string)","Reducer 23":"_col0 (type: string), _col2 (type: string)"} + | outputColumnNames:["_col0","_col3","_col4","_col5","_col6","_col8","_col9","_col10"] + | Statistics:Num rows: 90 Data size: 961 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 23 [BROADCAST_EDGE] + | Reduce Output Operator [RS_117] + | key expressions:_col0 (type: string), _col2 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col2 (type: string) + | sort order:++ + | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: string), _col3 (type: string), _col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_112] + | | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 34 Data size: 361 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 22 [SIMPLE_EDGE] + | Reduce Output Operator [RS_111] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + | sort order:++++ + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col4 (type: bigint), _col5 (type: bigint) + | Group By Operator [GBY_110] + | aggregations:["$sum0(_col9)","$sum0(_col10)"] + | keys:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_108] + | outputColumnNames:["_col1","_col2","_col3","_col4","_col9","_col10"] + | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | Map Join Operator [MAPJOIN_179] + | | condition map:[{"":"Inner Join 0 to 1"}] + | | keys:{"Reducer 20":"_col0 (type: string)","Reducer 22":"_col0 (type: string)"} + | | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col6","_col8"] + | | Statistics:Num rows: 68 Data size: 723 Basic stats: COMPLETE Column stats: NONE + | |<-Reducer 20 [BROADCAST_EDGE] + | | Reduce Output Operator [RS_104] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_93] + | | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + | | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | | |<-Map 19 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_92] + | | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | | sort order:+++++ + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | value expressions:_col5 (type: bigint), _col6 (type: bigint) + | | Group By Operator [GBY_91] + | | aggregations:["count()","count(_col3)"] + | | keys:_col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Select Operator [SEL_89] + | | outputColumnNames:["_col0","_col2","_col3","_col4","_col5"] + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | Filter Operator [FIL_173] + | | predicate:((((((v1 = 'srv1') and k1 is not null) and k2 is not null) and k3 is not null) and v2 is not null) and v3 is not null) (type: boolean) + | | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | | TableScan [TS_87] + | | alias:sr + | | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + | |<-Group By Operator [GBY_101] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | |<-Map 21 [SIMPLE_EDGE] + | Reduce Output Operator [RS_100] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_99] + | aggregations:["count()"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_97] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_174] + | predicate:((value) IN ('2000Q1', '2000Q2', '2000Q3') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_95] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Group By Operator [GBY_85] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 82 Data size: 874 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 12 [SIMPLE_EDGE] + Reduce Output Operator [RS_84] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order:+++ + Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + value expressions:_col3 (type: bigint), _col4 (type: bigint) + Group By Operator [GBY_83] + aggregations:["$sum0(_col8)","$sum0(_col9)"] + keys:_col2 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_81] + outputColumnNames:["_col2","_col4","_col5","_col8","_col9"] + Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_178] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 7":"_col0 (type: string)","Reducer 12":"_col1 (type: string)"} + | outputColumnNames:["_col1","_col2","_col4","_col5","_col6","_col7"] + | Statistics:Num rows: 165 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 7 [BROADCAST_EDGE] + | Reduce Output Operator [RS_77] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_32] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 3 Data size: 22 Basic stats: COMPLETE Column stats: NONE + | |<-Map 6 [SIMPLE_EDGE] + | Reduce Output Operator [RS_31] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_30] + | aggregations:["count()"] + | keys:_col1 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_28] + | outputColumnNames:["_col1"] + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_169] + | predicate:((key = 'src1key') and value is not null) (type: boolean) + | Statistics:Num rows: 6 Data size: 45 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_26] + | alias:src1 + | Statistics:Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + |<-Select Operator [SEL_74] + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_177] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 9":"_col0 (type: string)","Reducer 12":"_col0 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + | Statistics:Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 9 [BROADCAST_EDGE] + | Reduce Output Operator [RS_70] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_40] + | | aggregations:["count(VALUE._col0)"] + | | keys:KEY._col0 (type: string) + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 62 Data size: 658 Basic stats: COMPLETE Column stats: NONE + | |<-Map 8 [SIMPLE_EDGE] + | Reduce Output Operator [RS_39] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col1 (type: bigint) + | Group By Operator [GBY_38] + | aggregations:["count()"] + | keys:_col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_36] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_170] + | predicate:((value = 'd1value') and key is not null) (type: boolean) + | Statistics:Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_34] + | alias:d1 + | Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + |<-Group By Operator [GBY_67] + | aggregations:["$sum0(VALUE._col0)","$sum0(VALUE._col1)"] + | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + | Statistics:Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 11 [SIMPLE_EDGE] + Reduce Output Operator [RS_66] + key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + sort order:++++ + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions:_col4 (type: bigint), _col5 (type: bigint) + Group By Operator [GBY_65] + aggregations:["$sum0(_col9)","$sum0(_col10)"] + keys:_col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string) + outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5"] + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_63] + outputColumnNames:["_col2","_col3","_col4","_col6","_col9","_col10"] + Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Map Join Operator [MAPJOIN_176] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{"Reducer 11":"_col0 (type: string)","Reducer 18":"_col3 (type: string)"} + | outputColumnNames:["_col1","_col2","_col3","_col4","_col6","_col7","_col8"] + | Statistics:Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 18 [BROADCAST_EDGE] + | Reduce Output Operator [RS_61] + | key expressions:_col3 (type: string) + | Map-reduce partition columns:_col3 (type: string) + | sort order:+ + | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: bigint), _col6 (type: bigint) + | Group By Operator [GBY_56] + | | aggregations:["count(VALUE._col0)","count(VALUE._col1)"] + | | keys:KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: string) + | | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | | Statistics:Num rows: 1 Data size: 34 Basic stats: COMPLETE Column stats: NONE + | |<-Map 17 [SIMPLE_EDGE] + | Reduce Output Operator [RS_55] + | key expressions:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | Map-reduce partition columns:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | sort order:+++++ + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | value expressions:_col5 (type: bigint), _col6 (type: bigint) + | Group By Operator [GBY_54] + | aggregations:["count(_col1)","count()"] + | keys:_col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Select Operator [SEL_52] + | outputColumnNames:["_col0","_col1","_col2","_col3","_col4"] + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | Filter Operator [FIL_172] + | predicate:((((((v3 = 'ssv3') and v2 is not null) and k1 is not null) and v1 is not null) and k2 is not null) and k3 is not null) (type: boolean) + | Statistics:Num rows: 2 Data size: 69 Basic stats: COMPLETE Column stats: NONE + | TableScan [TS_50] + | alias:ss + | Statistics:Num rows: 85 Data size: 2945 Basic stats: COMPLETE Column stats: NONE + |<-Group By Operator [GBY_48] + | aggregations:["count(VALUE._col0)"] + | keys:KEY._col0 (type: string) + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + |<-Map 10 [SIMPLE_EDGE] + Reduce Output Operator [RS_47] + key expressions:_col0 (type: string) + Map-reduce partition columns:_col0 (type: string) + sort order:+ + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions:_col1 (type: bigint) + Group By Operator [GBY_46] + aggregations:["count()"] + keys:_col1 (type: string) + outputColumnNames:["_col0","_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator [SEL_44] + outputColumnNames:["_col1"] + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator [FIL_171] + predicate:((key = 'srcpartkey') and value is not null) (type: boolean) + Statistics:Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan [TS_42] + alias:srcpart + Statistics:Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE PREHOOK: query: explain SELECT x.key, z.value, y.value diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index 4b39b2c..12e3f30 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -45,8 +45,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,25 +62,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -91,15 +86,53 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +143,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -164,8 +211,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -180,26 +228,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 4505 Data size: 968719 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -211,15 +252,54 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 2252 Data size: 484359 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -230,6 +310,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -280,8 +374,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -296,25 +391,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -326,15 +415,53 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -345,6 +472,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -395,8 +536,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -411,26 +553,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 3 - Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 4 Map Operator Tree: TableScan alias: c @@ -442,15 +577,54 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -461,6 +635,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out index 0bf689b..1246c86 100644 --- a/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/mapjoin_mapjoin.q.out @@ -579,8 +579,11 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (BROADCAST_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -595,41 +598,19 @@ STAGE PLANS: expressions: key (type: string), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2 - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - keys: _col2 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Map 3 + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: bigint) + Map 5 Map Operator Tree: TableScan alias: src @@ -641,12 +622,19 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 7 Map Operator Tree: TableScan alias: src @@ -658,30 +646,135 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col2, _col3, _col5 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: string), _col2 (type: string), (_col3 * _col5) (type: bigint) + outputColumnNames: _col0, _col2, _col6 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col6) + keys: _col0 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: $sum0(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2, _col4 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: string), (_col2 * _col4) (type: bigint) + outputColumnNames: _col1, _col5 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 800 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out index b55cb7e..8df415b 100644 --- a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out @@ -620,9 +620,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -635,13 +636,20 @@ STAGE PLANS: expressions: ds (type: string) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - tag: 0 - auto parallelism: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: bigint) + auto parallelism: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -728,7 +736,7 @@ STAGE PLANS: Truncated Path -> Alias: /test1/ds=1 [a2] /test1/ds=2 [a2] - Map 4 + Map 5 Map Operator Tree: TableScan alias: a2 @@ -838,16 +846,34 @@ STAGE PLANS: Reducer 2 Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: 0 + value expressions: _col1 (type: bigint) + auto parallelism: true + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) + outputColumnNames: _col1 Position of Big Table: 0 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Group By Operator - aggregations: count() + aggregations: $sum0(_col1) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -857,11 +883,11 @@ STAGE PLANS: tag: -1 value expressions: _col0 (type: bigint) auto parallelism: false - Reducer 3 + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -886,7 +912,7 @@ STAGE PLANS: TotalFiles: 1 GatherStats: false MultiFileSpray: false - Reducer 5 + Reducer 6 Needs Tagging: false Reduce Operator Tree: Group By Operator diff --git a/ql/src/test/results/clientpositive/tez/mrr.q.out b/ql/src/test/results/clientpositive/tez/mrr.q.out index faace21..0dd5a44 100644 --- a/ql/src/test/results/clientpositive/tez/mrr.q.out +++ b/ql/src/test/results/clientpositive/tez/mrr.q.out @@ -405,9 +405,11 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -422,13 +424,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Map 5 + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 6 Map Operator Tree: TableScan alias: s1 @@ -440,13 +446,31 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 @@ -454,48 +478,57 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: string), _col1 (type: string) + aggregations: count(_col1) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: string) + sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) - Reducer 4 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator @@ -834,9 +867,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -851,28 +885,17 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash outputColumnNames: _col0, _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - Map 4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Map 5 Map Operator Tree: TableScan alias: s1 @@ -884,42 +907,84 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: complete + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: bigint) - sort order: + - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + input vertices: + 1 Reducer 6 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 137 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: bigint) + sort order: + + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 68 Data size: 722 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 68 Data size: 724 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out index 8a46807..a512317 100644 --- a/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_1.q.out @@ -149,8 +149,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -165,12 +167,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 Map Operator Tree: TableScan alias: a @@ -182,33 +191,59 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -219,6 +254,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -269,9 +318,11 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -286,13 +337,19 @@ STAGE PLANS: expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: smallint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint) - Map 5 + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Map 6 Map Operator Tree: TableScan alias: a @@ -304,59 +361,98 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: smallint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), (_col2 * _col4) (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) - Reducer 4 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -543,8 +639,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -559,11 +656,18 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 4 Map Operator Tree: TableScan @@ -576,36 +680,54 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -616,6 +738,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -666,9 +802,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -683,12 +820,18 @@ STAGE PLANS: expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: smallint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Map 5 Map Operator Tree: TableScan @@ -701,62 +844,93 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - keys: _col0 (type: smallint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: smallint), (_col2 * _col4) (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out index 59c8df4..5f7daed 100644 --- a/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out @@ -142,9 +142,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 6 <- Map 7 (BROADCAST_EDGE), Union 3 (CONTAINS) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 7 (BROADCAST_EDGE), Union 3 (CONTAINS) - Reducer 4 <- Union 3 (SIMPLE_EDGE) + Map 6 <- Union 3 (CONTAINS) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Reducer 8 (BROADCAST_EDGE), Union 3 (SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -194,29 +195,18 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 7 - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2100 Data size: 22310 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2100 Data size: 22310 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Map 7 Map Operator Tree: TableScan @@ -229,16 +219,18 @@ STAGE PLANS: expressions: value (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Merge Join Operator @@ -253,48 +245,67 @@ STAGE PLANS: expressions: _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2100 Data size: 22310 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2100 Data size: 22310 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1050 Data size: 11155 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 1050 Data size: 11155 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1 + outputColumnNames: _col2, _col4 input vertices: - 1 Map 7 - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE + 1 Reducer 8 + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE HybridGraceHashJoin: true - Group By Operator - aggregations: count() - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 2310 Data size: 24541 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: bigint) - Reducer 4 + Select Operator + expressions: (_col2 * _col4) (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) + keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col2 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1155 Data size: 12270 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/tez/tez_union.q.out b/ql/src/test/results/clientpositive/tez/tez_union.q.out index 5a7d0d6..df073d8 100644 --- a/ql/src/test/results/clientpositive/tez/tez_union.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_union.q.out @@ -163,11 +163,13 @@ STAGE PLANS: Tez Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) - Map 6 <- Union 7 (CONTAINS) - Map 8 <- Union 7 (CONTAINS) - Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Map 10 <- Union 8 (CONTAINS) + Map 6 <- Union 2 (CONTAINS) + Map 7 <- Union 8 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 9 <- Union 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -182,12 +184,19 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 10 Map Operator Tree: TableScan alias: src @@ -199,11 +208,18 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Map 6 Map Operator Tree: TableScan @@ -216,12 +232,19 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 8 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 7 Map Operator Tree: TableScan alias: src @@ -233,33 +256,59 @@ STAGE PLANS: expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + outputColumnNames: _col1, _col3 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 4 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 5 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -270,10 +319,24 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Union 2 Vertex: Union 2 - Union 7 - Vertex: Union 7 + Union 8 + Vertex: Union 8 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out index ba35e4c..622985b 100644 --- a/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/tez_vector_dynpart_hashjoin_1.q.out @@ -149,8 +149,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -165,12 +167,19 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Map 5 Map Operator Tree: TableScan alias: a @@ -182,33 +191,59 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + outputColumnNames: _col1, _col3 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reducer 3 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -219,6 +254,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -269,9 +318,11 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -286,13 +337,19 @@ STAGE PLANS: expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: smallint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint) - Map 5 + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Map 6 Map Operator Tree: TableScan alias: a @@ -304,59 +361,98 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: smallint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - keys: _col0 (type: smallint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Reducer 3 + outputColumnNames: _col0, _col2, _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: smallint), (_col2 * _col4) (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) - Reducer 4 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -547,8 +643,9 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -563,11 +660,18 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Map 4 Map Operator Tree: @@ -581,38 +685,55 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Reducer 2 Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: vectorized + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: bigint) + outputColumnNames: _col4 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -623,6 +744,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Stage: Stage-0 @@ -674,9 +809,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -691,12 +827,18 @@ STAGE PLANS: expressions: csmallint (type: smallint), cint (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: smallint), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: smallint) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) Execution mode: vectorized Map 5 Map Operator Tree: @@ -710,66 +852,96 @@ STAGE PLANS: expressions: cint (type: int) outputColumnNames: _col0 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Reducer 2 Reduce Operator Tree: - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 KEY.reducesinkkey0 (type: int) - 1 KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 5 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - keys: _col0 (type: smallint) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) - Execution mode: vectorized + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1536 Data size: 330245 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2, _col4 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col0 (type: smallint), (_col2 * _col4) (type: bigint) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: $sum0(_col5) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 1689 Data size: 363269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Reducer 3 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) keys: KEY._col0 (type: smallint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: bigint) sort order: + - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint) - Execution mode: vectorized Reducer 4 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1689 Data size: 363162 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 844 Data size: 181526 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 768 Data size: 165122 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out index 74be17b..ddc3c19 100644 --- a/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out @@ -14,8 +14,10 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE), Reducer 6 (BROADCAST_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -64,28 +66,20 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - input vertices: - 1 Map 4 - Statistics: Num rows: 7433 Data size: 1598388 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Execution mode: vectorized - Map 4 + Map 5 Map Operator Tree: TableScan alias: v1 @@ -97,16 +91,56 @@ STAGE PLANS: expressions: csmallint (type: smallint) outputColumnNames: _col0 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: smallint) - sort order: + - Map-reduce partition columns: _col0 (type: smallint) + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) Execution mode: vectorized Reducer 3 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col1, _col3 + input vertices: + 1 Reducer 6 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: (_col1 * _col3) (type: double) + outputColumnNames: _col4 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +152,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -133,4 +182,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932486892E11 +6.06519093248695E11 diff --git a/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out index 6a9532e..8c8f4d7 100644 --- a/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vectorized_nested_mapjoin.q.out @@ -7,22 +7,222 @@ POSTHOOK: query: -- SORT_QUERY_RESULTS explain select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble as td from alltypesorc v1, alltypesorc v2 where v1.ctinyint=v2.ctinyint) t1 join alltypesorc v3 on t1.tsi=v3.csmallint POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-3 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-3 + Stage-6 is a root stage + Stage-9 depends on stages: Stage-2, Stage-6 , consists of Stage-11, Stage-12, Stage-3 + Stage-11 has a backup stage: Stage-3 + Stage-7 depends on stages: Stage-11 + Stage-4 depends on stages: Stage-3, Stage-7, Stage-8 + Stage-12 has a backup stage: Stage-3 + Stage-8 depends on stages: Stage-12 + Stage-3 + Stage-13 is a root stage + Stage-2 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-4 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + alias: v1 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: csmallint is not null (type: boolean) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: csmallint (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: smallint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-9 + Conditional Operator + + Stage: Stage-11 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:v1 + $hdt$_0:$INTNAME1 Fetch Operator limit: -1 - $hdt$_1:v1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME1 + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: double) + outputColumnNames: _col4 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$INTNAME + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:$INTNAME + TableScan + HashTable Sink Operator + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: double) + outputColumnNames: _col4 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Local Work: + Map Reduce Local Work + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + TableScan + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col1, _col3 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (_col1 * _col3) (type: double) + outputColumnNames: _col4 + Statistics: Num rows: 3716 Data size: 799194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col4) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-13 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:v1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:v1 + $hdt$_0:$hdt$_0:$hdt$_0:$hdt$_0:v1 TableScan alias: v1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE @@ -37,23 +237,8 @@ STAGE PLANS: keys: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) - $hdt$_1:v1 - TableScan - alias: v1 - Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: csmallint is not null (type: boolean) - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: csmallint (type: smallint) - outputColumnNames: _col0 - Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - Stage: Stage-3 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -78,39 +263,34 @@ STAGE PLANS: expressions: _col1 (type: smallint), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: smallint) - 1 _col0 (type: smallint) - outputColumnNames: _col1 - Statistics: Num rows: 7433 Data size: 1598388 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col1) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: double) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: smallint) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 6758 Data size: 1453080 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) Local Work: Map Reduce Local Work Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: smallint) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3379 Data size: 726540 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -126,4 +306,4 @@ POSTHOOK: query: select sum(t1.td) from (select v1.csmallint as tsi, v1.cdouble POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### -6.065190932486892E11 +6.06519093248695E11 -- 1.7.12.4 (Apple Git-37)