diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 953e52c..1bfa132 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1314,6 +1314,17 @@ public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { "we are increasing the number of files possibly by a big margin. So, we merge aggressively."), HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE("hive.optimize.limitjointranspose", false, + "Whether to push a limit through left/right outer join. If the value is true and the size of the outer\n" + + "input is reduced enough (as specified in hive.optimize.limitjointranspose.reduction), the limit is pushed\n" + + "to the outer input; to remain semantically correct, the limit is kept on top of the join too."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limitjointranspose.reductionpercentage", 1.0f, + "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction of the\n" + + "size of the outer input of the join that we should get in order to apply the rule."), + HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limitjointranspose.reductiontuples", (long) 0, + "When hive.optimize.limitjointranspose is true, this variable specifies the minimal reduction in the\n" + + "number of tuples of the outer input of the join that you should get in order to apply the rule."), + HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false, "Whether to create a separate plan for skewed keys for the tables in the join.\n" + "This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" + diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 402914c..92ba29a 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -369,6 +369,7 @@ minitez.query.files=bucket_map_join_tez1.q,\ tez_insert_overwrite_local_directory_1.q,\ tez_dynpart_hashjoin_1.q,\ tez_dynpart_hashjoin_2.q,\ + tez_dynpart_hashjoin_3.q,\ tez_vector_dynpart_hashjoin_1.q,\ tez_vector_dynpart_hashjoin_2.q,\ tez_join_hash.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java index 82fdc0e..3077177 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSortLimit.java @@ -44,6 +44,8 @@ // 4. This is used by ASTConverter after we are done with Calcite Planning private ImmutableMap mapOfInputRefToRexCall; + private boolean ruleCreated; + public HiveSortLimit(RelOptCluster cluster, RelTraitSet traitSet, RelNode child, RelCollation collation, RexNode offset, RexNode fetch) { super(cluster, TraitsUtil.getSortTraitSet(cluster, traitSet, collation), child, collation, @@ -74,7 +76,10 @@ public HiveSortLimit copy(RelTraitSet traitSet, RelNode newInput, RelCollation n // TODO: can we blindly copy sort trait? What if inputs changed and we // are now sorting by different cols RelCollation canonizedCollation = traitSet.canonize(newCollation); - return new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); + HiveSortLimit sortLimit = + new HiveSortLimit(getCluster(), traitSet, newInput, canonizedCollation, offset, fetch); + sortLimit.setRuleCreated(ruleCreated); + return sortLimit; } public RexNode getFetchExpr() { @@ -93,6 +98,14 @@ public void setInputRefToCallMap(ImmutableMap refToCall) { public void implement(Implementor implementor) { } + public boolean isRuleCreated() { + return ruleCreated; + } + + public void setRuleCreated(boolean ruleCreated) { + this.ruleCreated = ruleCreated; + } + private static class HiveSortRelFactory implements RelFactories.SortFactory { @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java new file mode 100644 index 0000000..9d74b19 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveProjectSortTransposeRule.java @@ -0,0 +1,66 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +import com.google.common.collect.ImmutableList; + +public class HiveProjectSortTransposeRule extends RelOptRule { + + public static final HiveProjectSortTransposeRule INSTANCE = + new HiveProjectSortTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveProjectSortTransposeRule. + */ + private HiveProjectSortTransposeRule() { + super( + operand( + HiveProject.class, + operand(HiveSortLimit.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveProject project = call.rel(0); + final HiveSortLimit sort = call.rel(1); + + // Determine mapping between project input and output fields. If sort + // relies on non-trivial expressions, we can't push. + final Mappings.TargetMapping map = + RelOptUtil.permutation( + project.getProjects(), project.getInput().getRowType()).inverse(); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + if (map.getTarget(fc.getFieldIndex()) < 0) { + return; + } + } + + // Create new collation + final RelCollation newCollation = + RelCollationTraitDef.INSTANCE.canonize( + RexUtil.apply(map, sort.getCollation())); + + // New operators + final RelNode newProject = project.copy(sort.getInput().getTraitSet(), + ImmutableList.of(sort.getInput())); + final HiveSortLimit newSort = sort.copy(newProject.getTraitSet(), + newProject, newCollation, sort.offset, sort.fetch); + + call.transformTo(newSort); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java new file mode 100644 index 0000000..d8b9c54 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortJoinReduceRule.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexLiteral; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * Planner rule that pushes + * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit} + * past a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin}. + */ +public class HiveSortJoinReduceRule extends RelOptRule { + + public static final HiveSortJoinReduceRule INSTANCE = + new HiveSortJoinReduceRule(); + + //~ Constructors ----------------------------------------------------------- + + private HiveSortJoinReduceRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveJoin.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + final HiveJoin join = call.rel(1); + + // If sort does not contain a limit operation, we bail out + if (!HiveCalciteUtil.limitRelNode(sortLimit)) { + return false; + } + + // 1) If join is not a left or right outer, we bail out + // 2) If any sort column is not part of the input where the + // sort is pushed, we bail out + RelNode reducedInput; + if (join.getJoinType() == JoinRelType.LEFT) { + reducedInput = join.getLeft(); + if (sortLimit.getCollation() != RelCollations.EMPTY) { + for (RelFieldCollation relFieldCollation + : sortLimit.getCollation().getFieldCollations()) { + if (relFieldCollation.getFieldIndex() + >= join.getLeft().getRowType().getFieldCount()) { + return false; + } + } + } + } else if (join.getJoinType() == JoinRelType.RIGHT) { + reducedInput = join.getRight(); + if (sortLimit.getCollation() != RelCollations.EMPTY) { + for (RelFieldCollation relFieldCollation + : sortLimit.getCollation().getFieldCollations()) { + if (relFieldCollation.getFieldIndex() + < join.getLeft().getRowType().getFieldCount()) { + return false; + } + } + } + } else { + return false; + } + + // Finally, if we do not reduce the input size, we bail out + if (RexLiteral.intValue(sortLimit.fetch) + >= RelMetadataQuery.getRowCount(reducedInput)) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + final HiveJoin join = call.rel(1); + RelNode inputLeft = join.getLeft(); + RelNode inputRight = join.getRight(); + + // We create a new sort operator on the corresponding input + if (join.getJoinType() == JoinRelType.LEFT) { + inputLeft = sortLimit.copy(sortLimit.getTraitSet(), inputLeft, + sortLimit.getCollation(), sortLimit.offset, sortLimit.fetch); + ((HiveSortLimit) inputLeft).setRuleCreated(true); + } else { + // Adjust right collation + final RelCollation rightCollation = + RelCollationTraitDef.INSTANCE.canonize( + RelCollations.shift(sortLimit.getCollation(), + -join.getLeft().getRowType().getFieldCount())); + inputRight = sortLimit.copy(sortLimit.getTraitSet().replace(rightCollation), inputRight, + rightCollation, sortLimit.offset, sortLimit.fetch); + ((HiveSortLimit) inputRight).setRuleCreated(true); + } + // We copy the join and the top sort operator + RelNode result = join.copy(join.getTraitSet(), join.getCondition(), inputLeft, + inputRight, join.getJoinType(), join.isSemiJoinDone()); + result = sortLimit.copy(sortLimit.getTraitSet(), result, sortLimit.getCollation(), + sortLimit.offset, sortLimit.fetch); + + call.transformTo(result); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java new file mode 100644 index 0000000..ebb20c2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortMergeRule.java @@ -0,0 +1,78 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rex.RexLiteral; +import org.apache.calcite.rex.RexNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * This rule will merge two HiveSortLimit operators. + * + * It is applied when the top match is a pure limit operation (no sorting). + * + * If the bottom operator is not synthetic and does not contain a limit, + * we currently bail out. Thus, we avoid a lot of unnecessary limit operations + * in the middle of the execution plan that could create performance regressions. + */ +public class HiveSortMergeRule extends RelOptRule { + + public static final HiveSortMergeRule INSTANCE = + new HiveSortMergeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveSortProjectTransposeRule. + */ + private HiveSortMergeRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveSortLimit.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit topSortLimit = call.rel(0); + final HiveSortLimit bottomSortLimit = call.rel(1); + + // If top operator is not a pure limit, we bail out + if (!HiveCalciteUtil.pureLimitRelNode(topSortLimit)) { + return false; + } + + // If the bottom operator is not synthetic and it does not contain a limit, + // we will bail out; we do not want to end up with limits all over the tree + if (topSortLimit.isRuleCreated() && !bottomSortLimit.isRuleCreated() && + bottomSortLimit.fetch == null) { + return false; + } + + return true; + } + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit topSortLimit = call.rel(0); + final HiveSortLimit bottomSortLimit = call.rel(1); + + // Lowest limit + final RexNode newLimit; + if (bottomSortLimit.fetch != null && RexLiteral.intValue(topSortLimit.fetch) + >= RexLiteral.intValue(bottomSortLimit.fetch)) { + newLimit = bottomSortLimit.fetch; + } else { + newLimit = topSortLimit.fetch; + } + + final HiveSortLimit newSort = bottomSortLimit.copy(bottomSortLimit.getTraitSet(), + bottomSortLimit.getInput(), bottomSortLimit.collation, null, newLimit); + + call.transformTo(newSort); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java new file mode 100644 index 0000000..debfbe0 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortProjectTransposeRule.java @@ -0,0 +1,79 @@ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.rel.RelCollation; +import org.apache.calcite.rel.RelCollationTraitDef; +import org.apache.calcite.rel.RelFieldCollation; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.util.mapping.Mappings; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +import com.google.common.collect.ImmutableList; + +public class HiveSortProjectTransposeRule extends RelOptRule { + + public static final HiveSortProjectTransposeRule INSTANCE = + new HiveSortProjectTransposeRule(); + + //~ Constructors ----------------------------------------------------------- + + /** + * Creates a HiveSortProjectTransposeRule. + */ + private HiveSortProjectTransposeRule() { + super( + operand( + HiveSortLimit.class, + operand(HiveProject.class, any()))); + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // If does not contain a limit operation, we bail out + if (!HiveCalciteUtil.limitRelNode(sortLimit)) { + return false; + } + + return true; + } + + // implement RelOptRule + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sort = call.rel(0); + final HiveProject project = call.rel(1); + + // Determine mapping between project input and output fields. If sort + // relies on non-trivial expressions, we can't push. + final Mappings.TargetMapping map = + RelOptUtil.permutation( + project.getProjects(), project.getInput().getRowType()); + for (RelFieldCollation fc : sort.getCollation().getFieldCollations()) { + if (map.getTargetOpt(fc.getFieldIndex()) < 0) { + return; + } + } + + // Create new collation + final RelCollation newCollation = + RelCollationTraitDef.INSTANCE.canonize( + RexUtil.apply(map, sort.getCollation())); + + // New operators + final HiveSortLimit newSort = sort.copy(sort.getTraitSet().replace(newCollation), + project.getInput(), newCollation, sort.offset, sort.fetch); + final RelNode newProject = project.copy(sort.getTraitSet(), + ImmutableList.of(newSort)); + + call.transformTo(newProject); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java new file mode 100644 index 0000000..618c717 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSortRemoveRule.java @@ -0,0 +1,79 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptRuleOperand; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rex.RexLiteral; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; + +/** + * Planner rule that removes + * a {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit}. + */ +public class HiveSortRemoveRule extends RelOptRule { + + protected final float reductionProportion; + protected final float reductionTuples; + + //~ Constructors ----------------------------------------------------------- + + public HiveSortRemoveRule(float reductionProportion, long reductionTuples) { + this(operand(HiveSortLimit.class, any()), reductionProportion, reductionTuples); + } + + private HiveSortRemoveRule(RelOptRuleOperand operand, float reductionProportion, + long reductionTuples) { + super(operand); + this.reductionProportion = reductionProportion; + this.reductionTuples = reductionTuples; + } + + //~ Methods ---------------------------------------------------------------- + + @Override + public boolean matches(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // If it is not created by HiveSortJoinReduceRule, we cannot remove it + if (!sortLimit.isRuleCreated()) { + return false; + } + + // Finally, if we do not reduce the size input enough, we bail out + int limit = RexLiteral.intValue(sortLimit.fetch); + Double rowCount = RelMetadataQuery.getRowCount(sortLimit.getInput()); + if (rowCount != null && limit <= reductionProportion * rowCount && + rowCount - limit >= reductionTuples) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HiveSortLimit sortLimit = call.rel(0); + + // We remove the limit operator + call.transformTo(sortLimit.getInput()); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java index 728c5aa..caf8978 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdRowCount.java @@ -30,6 +30,7 @@ import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; import org.apache.calcite.rel.core.SemiJoin; +import org.apache.calcite.rel.core.Sort; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdRowCount; @@ -38,6 +39,7 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.sql.fun.SqlStdOperatorTable; @@ -87,6 +89,22 @@ public Double getRowCount(SemiJoin rel) { return super.getRowCount(rel); } + @Override + public Double getRowCount(Sort rel) { + final Double rowCount = RelMetadataQuery.getRowCount(rel.getInput()); + if (rowCount != null && rel.fetch != null) { + final int offset = rel.offset == null ? 0 : RexLiteral.intValue(rel.offset); + final int limit = RexLiteral.intValue(rel.fetch); + final Double offsetLimit = new Double(offset + limit); + // offsetLimit is smaller than rowCount of the input operator + // thus, we return the offsetLimit + if (offsetLimit < rowCount) { + return offsetLimit; + } + } + return rowCount; + } + static class PKFKRelationInfo { public final int fkSide; public final double ndvScalingFactor; diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java index 715f24f..a0eb83d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdSelectivity.java @@ -61,6 +61,16 @@ public Double getSelectivity(HiveTableScan t, RexNode predicate) { public Double getSelectivity(HiveJoin j, RexNode predicate) throws CalciteSemanticException { if (j.getJoinType().equals(JoinRelType.INNER)) { return computeInnerJoinSelectivity(j, predicate); + } else if (j.getJoinType().equals(JoinRelType.LEFT) || + j.getJoinType().equals(JoinRelType.RIGHT)) { + double left = RelMetadataQuery.getRowCount(j.getLeft()); + double right = RelMetadataQuery.getRowCount(j.getRight()); + double product = left * right; + double innerJoinSelectivity = computeInnerJoinSelectivity(j, predicate); + if (j.getJoinType().equals(JoinRelType.LEFT)) { + return Math.max(innerJoinSelectivity, left/product); + } + return Math.max(innerJoinSelectivity, right/product); } return 1.0; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index de67b54..bfe4633 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -148,8 +148,13 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectSortTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -953,16 +958,31 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, HiveExpandDistinctAggregatesRule.INSTANCE); } - // 1. Push Down Semi Joins + // 1. Push down limit through outer join + if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE)) { + // This should be a cost based decision, but till we enable the extended cost + // model, we will use the given value for the variable + final float reductionProportion = HiveConf.getFloatVar(conf, + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_PERCENTAGE); + final long reductionTuples = HiveConf.getLongVar(conf, + HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_JOIN_TRANSPOSE_REDUCTION_TUPLES); + basePlan = hepPlan(basePlan, true, mdProvider, HiveSortMergeRule.INSTANCE, + HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE); + basePlan = hepPlan(basePlan, true, mdProvider, HepMatchOrder.BOTTOM_UP, + new HiveSortRemoveRule(reductionProportion, reductionTuples), + HiveProjectSortTransposeRule.INSTANCE); + } + + // 2. Push Down Semi Joins basePlan = hepPlan(basePlan, true, mdProvider, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); - // 2. Add not null filters + // 3. Add not null filters if (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { basePlan = hepPlan(basePlan, true, mdProvider, HiveJoinAddNotNullRule.INSTANCE); } - // 3. Constant propagation, common filter extraction, and PPD + // 4. Constant propagation, common filter extraction, and PPD basePlan = hepPlan(basePlan, true, mdProvider, ReduceExpressionsRule.PROJECT_INSTANCE, ReduceExpressionsRule.FILTER_INSTANCE, @@ -976,12 +996,12 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new FilterAggregateTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); - // 4. Transitive inference & Partition Pruning + // 5. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); - // 5. Projection Pruning + // 6. Projection Pruning HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, cluster, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY, HiveJoin.HIVE_JOIN_FACTORY, @@ -989,7 +1009,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); basePlan = fieldTrimmer.trim(basePlan); - // 6. Rerun PPD through Project as column pruning would have introduced DT + // 7. Rerun PPD through Project as column pruning would have introduced DT // above scans basePlan = hepPlan(basePlan, true, mdProvider, new FilterProjectTransposeRule(Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, diff --git ql/src/test/queries/clientpositive/limit_join_transpose.q ql/src/test/queries/clientpositive/limit_join_transpose.q new file mode 100644 index 0000000..bfc2378 --- /dev/null +++ ql/src/test/queries/clientpositive/limit_join_transpose.q @@ -0,0 +1,99 @@ +set hive.optimize.limitjointranspose=false; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + + +set hive.optimize.limitjointranspose=true; +set hive.optimize.limitjointranspose.reductionpercentage=0.0001f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + + +set hive.optimize.limitjointranspose.reductionpercentage=0.1f; +set hive.optimize.limitjointranspose.reductiontuples=10; + +explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +set hive.optimize.limitjointranspose.reductionpercentage=1f; +set hive.optimize.limitjointranspose.reductiontuples=0; + +explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1; + +explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1; + +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1; diff --git ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q new file mode 100644 index 0000000..1994b40 --- /dev/null +++ ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -0,0 +1,24 @@ +set hive.optimize.limitjointranspose=true; +set hive.optimize.limitjointranspose.reductionpercentage=0.1f; +set hive.optimize.limitjointranspose.reductiontuples=100; +set hive.explain.user=false; +set hive.auto.convert.join=false; +set hive.optimize.dynamic.partition.hashjoin=false; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1; + + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=true; +set hive.auto.convert.join.noconditionaltask.size=200000; +set hive.exec.reducers.bytes.per.reducer=200000; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1; diff --git ql/src/test/results/clientpositive/join32_lessSize.q.out ql/src/test/results/clientpositive/join32_lessSize.q.out index 5ea4024..8e71710 100644 --- ql/src/test/results/clientpositive/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/join32_lessSize.q.out @@ -1920,22 +1920,22 @@ TOK_QUERY STAGE DEPENDENCIES: - Stage-9 is a root stage - Stage-7 depends on stages: Stage-9 - Stage-8 depends on stages: Stage-7 + Stage-8 is a root stage Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-0 STAGE PLANS: - Stage: Stage-9 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:$hdt$_0:y + $hdt$_1:$hdt$_1:y Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:$hdt$_0:y + $hdt$_1:$hdt$_1:y TableScan alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1950,7 +1950,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 1 - Stage: Stage-7 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -2089,12 +2089,12 @@ STAGE PLANS: name: default.src1 name: default.src1 Truncated Path -> Alias: - /src1 [$hdt$_0:$hdt$_1:x] + /src1 [$hdt$_1:$hdt$_2:x] - Stage: Stage-8 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_1:z + $hdt$_0:z Fetch Operator limit: -1 Partition Description: @@ -2144,7 +2144,7 @@ STAGE PLANS: name: default.srcpart name: default.srcpart Alias -> Map Local Operator Tree: - $hdt$_1:z + $hdt$_0:z TableScan alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2159,11 +2159,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 - Stage: Stage-6 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -2172,13 +2172,13 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - Position of Big Table: 0 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col4 + Position of Big Table: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2313,7 +2313,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 - Stage: Stage-3 + Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/join_merging.q.out ql/src/test/results/clientpositive/join_merging.q.out index b42bdc5..9ab9587 100644 --- ql/src/test/results/clientpositive/join_merging.q.out +++ ql/src/test/results/clientpositive/join_merging.q.out @@ -97,12 +97,12 @@ from part p1 left outer join part p2 on p1.p_partkey = p2.p_partkey p1.p_size > 10 and p1.p_size > p2.p_size + 10 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan @@ -153,17 +153,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col2 (type: int) - sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col3 (type: int) - TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -175,17 +168,24 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col3 (type: int) Reduce Operator Tree: Join Operator condition map: - Right Outer Join0 to 1 + Left Outer Join0 to 1 keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 + 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col3 (type: int) + expressions: _col2 (type: int), _col4 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/limit_join_transpose.q.out ql/src/test/results/clientpositive/limit_join_transpose.q.out new file mode 100644 index 0000000..8717154 --- /dev/null +++ ql/src/test/results/clientpositive/limit_join_transpose.q.out @@ -0,0 +1,869 @@ +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 +PREHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 left outer join src src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col4 (type: string), _col5 (type: string), _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select * + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 val_238 238 val_238 238 val_238 +PREHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string) + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src src1 right outer join ( + select src2.key, src2.value + from src src2 left outer join src src3 + on src2.value = src3.value) src2 +on src1.key = src2.key +order by src2.key +limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 diff --git ql/src/test/results/clientpositive/spark/join32_lessSize.q.out ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index 937e8fc..e2ae796 100644 --- ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -1767,18 +1767,22 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: y + alias: z Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - Position of Big Table: 1 + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col1 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -1786,9 +1790,12 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: src + base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1796,11 +1803,13 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src + name default.srcpart numFiles 1 numRows 500 + partition_columns ds/hr + partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct src { string key, string value} + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1810,45 +1819,38 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.src - numFiles 1 - numRows 500 - rawDataSize 5312 - serialization.ddl struct src { string key, string value} + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src - name: default.src + name: default.srcpart + name: default.srcpart Truncated Path -> Alias: - /src [y] - Map 3 + /srcpart/ds=2008-04-08/hr=11 [z] + Map 2 Map Operator Tree: TableScan - alias: z + alias: y Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false - Filter Operator - isSamplingPred: false - predicate: value is not null (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Spark HashTable Sink Operator - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Position of Big Table: 0 + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Position of Big Table: 1 Local Work: Map Reduce Local Work Path -> Alias: @@ -1856,12 +1858,9 @@ STAGE PLANS: Path -> Partition: #### A masked pattern was here #### Partition - base file name: hr=11 + base file name: src input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - partition values: - ds 2008-04-08 - hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count -1 @@ -1869,13 +1868,11 @@ STAGE PLANS: columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart + name default.src numFiles 1 numRows 500 - partition_columns ds/hr - partition_columns.types string:string rawDataSize 5312 - serialization.ddl struct srcpart { string key, string value} + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 5812 @@ -1885,29 +1882,32 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE true bucket_count -1 columns key,value columns.comments 'default','default' columns.types string:string #### A masked pattern was here #### - name default.srcpart - partition_columns ds/hr - partition_columns.types string:string - serialization.ddl struct srcpart { string key, string value} + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.srcpart - name: default.srcpart + name: default.src + name: default.src Truncated Path -> Alias: - /srcpart/ds=2008-04-08/hr=11 [z] + /src [y] Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -1929,7 +1929,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 0 Map 1 + 0 Map 2 Position of Big Table: 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1940,15 +1940,15 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 + 0 _col0 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col3, _col4 input vertices: - 1 Map 3 - Position of Big Table: 0 + 0 Map 1 + Position of Big Table: 1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col2 (type: string), _col1 (type: string) + expressions: _col3 (type: string), _col0 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/spark/join_merging.q.out ql/src/test/results/clientpositive/spark/join_merging.q.out index 6c5089b..baed227 100644 --- ql/src/test/results/clientpositive/spark/join_merging.q.out +++ ql/src/test/results/clientpositive/spark/join_merging.q.out @@ -114,8 +114,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Map 3 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -123,6 +123,20 @@ STAGE PLANS: TableScan alias: p1 Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: p1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (p_size > 10) (type: boolean) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE @@ -136,7 +150,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 8 Data size: 968 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 4 + Map 5 Map Operator Tree: TableScan alias: p1 @@ -151,20 +165,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) - Map 5 - Map Operator Tree: - TableScan - alias: p1 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -172,6 +172,27 @@ STAGE PLANS: Left Outer Join0 to 1 keys: 0 _col0 (type: int) + 1 _col2 (type: int) + outputColumnNames: _col2, _col4 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col4 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE @@ -184,27 +205,6 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col3 (type: int) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Right Outer Join0 to 1 - keys: - 0 _col2 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1, _col3 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 28 Data size: 3461 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out index 5563794..12bd35e 100644 --- ql/src/test/results/clientpositive/subquery_notin.q.out +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr explain select * @@ -18,64 +18,70 @@ where src.key not in ) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > '2') and key is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string) + Reduce Output Operator + sort order: + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan Reduce Output Operator sort order: @@ -88,62 +94,52 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (key > '2') (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -151,7 +147,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from src where src.key not in ( select key from src s1 where s1.key > '2') @@ -1243,7 +1239,7 @@ Manufacturer#5 almond antique medium spring khaki 6 Manufacturer#5 almond azure blanched chiffon midnight 23 Manufacturer#5 almond antique blue firebrick mint 31 Manufacturer#5 almond aquamarine dodger light gainsboro 46 -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr, Group By in Parent Query select li.l_partkey, count(*) from lineitem li @@ -1278,7 +1274,7 @@ POSTHOOK: Input: default@lineitem 139636 1 175839 1 182052 1 -Warning: Shuffle Join JOIN[20][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: -- alternate not in syntax select * from src @@ -1442,7 +1438,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@t1_v POSTHOOK: Output: database:default POSTHOOK: Output: default@T2_v -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) @@ -1452,50 +1448,12 @@ select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-3 is a root stage - Stage-1 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-1 + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage Stage-0 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-1 Map Reduce Map Operator Tree: @@ -1510,9 +1468,52 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col1 is null (type: boolean) + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) TableScan Reduce Output Operator sort order: @@ -1525,61 +1526,52 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 100 Data size: 1065 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE - TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key < '11') and (CASE WHEN ((key > '104')) THEN (null) ELSE (key) END < '11')) (type: boolean) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + predicate: ((key < '11') and CASE WHEN ((key > '104')) THEN (null) ELSE (key) END is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: CASE WHEN ((key > '104')) THEN (null) ELSE (key) END (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reduce Operator Tree: - Join Operator - condition map: - Left Outer Join0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 200 Data size: 2132 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col2 is null (type: boolean) - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1066 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator @@ -1587,7 +1579,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from T1_v where T1_v.key not in (select T2_v.key from T2_v) PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out index 9689ae3..150b527 100644 --- ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out +++ ql/src/test/results/clientpositive/subquery_notin_having.q.java1.7.out @@ -1,4 +1,4 @@ -Warning: Shuffle Join JOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-3:MAPRED' is a cross product PREHOOK: query: -- non agg, non corr -- JAVA_VERSION_SPECIFIC_OUTPUT @@ -25,9 +25,9 @@ having key not in POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-4 - Stage-3 depends on stages: Stage-2 - Stage-4 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2, Stage-5 + Stage-5 is a root stage Stage-0 depends on stages: Stage-3 STAGE PLANS: @@ -72,38 +72,10 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: bigint) - TableScan - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) TableScan alias: src @@ -127,24 +99,48 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: _col3 is null (type: boolean) - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 151 Data size: 1606 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + predicate: _col2 is null (type: boolean) + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 1600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git ql/src/test/results/clientpositive/tez/explainuser_1.q.out ql/src/test/results/clientpositive/tez/explainuser_1.q.out index fa29dfe..871fa4e 100644 --- ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -3732,108 +3732,105 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) -Reducer 6 <- Map 5 (SIMPLE_EDGE) +Reducer 7 <- Map 6 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:-1 Stage-1 Reducer 4 - File Output Operator [FS_28] + File Output Operator [FS_29] compressed:false - Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"} - Select Operator [SEL_27] + Select Operator [SEL_28] | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + | Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE |<-Reducer 3 [SIMPLE_EDGE] - Reduce Output Operator [RS_26] + Reduce Output Operator [RS_27] key expressions:_col0 (type: string) sort order:+ - Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE + Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE value expressions:_col1 (type: string) - Select Operator [SEL_25] - outputColumnNames:["_col0","_col1"] - Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE - Filter Operator [FIL_31] - predicate:_col3 is null (type: boolean) - Statistics:Num rows: 302 Data size: 53756 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator [MERGEJOIN_36] - | condition map:[{"":"Left Outer Join0 to 1"}] - | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} - | outputColumnNames:["_col0","_col1","_col3"] - | Statistics:Num rows: 605 Data size: 107690 Basic stats: COMPLETE Column stats: NONE - |<-Map 7 [SIMPLE_EDGE] - | Reduce Output Operator [RS_22] - | key expressions:_col0 (type: string) - | Map-reduce partition columns:_col0 (type: string) - | sort order:+ - | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - | Select Operator [SEL_16] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - | Filter Operator [FIL_34] - | predicate:(key > '2') (type: boolean) - | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_14] - | alias:src_cbo - | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 2 [SIMPLE_EDGE] - Reduce Output Operator [RS_21] - key expressions:_col0 (type: string) - Map-reduce partition columns:_col0 (type: string) - sort order:+ - Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - value expressions:_col1 (type: string) - Merge Join Operator [MERGEJOIN_35] - | condition map:[{"":"Inner Join 0 to 1"}] - | keys:{} - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - |<-Map 1 [SIMPLE_EDGE] - | Reduce Output Operator [RS_18] - | sort order: - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - | value expressions:_col0 (type: string), _col1 (type: string) - | Select Operator [SEL_2] - | outputColumnNames:["_col0","_col1"] - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - | TableScan [TS_0] - | alias:src_cbo - | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - |<-Reducer 6 [SIMPLE_EDGE] - Reduce Output Operator [RS_19] + Merge Join Operator [MERGEJOIN_37] + | condition map:[{"":"Inner Join 0 to 1"}] + | keys:{} + | outputColumnNames:["_col0","_col1"] + | Statistics:Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: NONE + |<-Reducer 2 [SIMPLE_EDGE] + | Reduce Output Operator [RS_21] + | sort order: + | Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + | value expressions:_col0 (type: string), _col1 (type: string) + | Filter Operator [FIL_32] + | predicate:_col2 is null (type: boolean) + | Statistics:Num rows: 1 Data size: 265 Basic stats: COMPLETE Column stats: COMPLETE + | Merge Join Operator [MERGEJOIN_36] + | | condition map:[{"":"Left Outer Join0 to 1"}] + | | keys:{"0":"_col0 (type: string)","1":"_col0 (type: string)"} + | | outputColumnNames:["_col0","_col1","_col2"] + | | Statistics:Num rows: 404 Data size: 107060 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 1 [SIMPLE_EDGE] + | | Reduce Output Operator [RS_18] + | | key expressions:_col0 (type: string) + | | Map-reduce partition columns:_col0 (type: string) + | | sort order:+ + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | | value expressions:_col1 (type: string) + | | Select Operator [SEL_2] + | | outputColumnNames:["_col0","_col1"] + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | | TableScan [TS_0] + | | alias:src_cbo + | | Statistics:Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + | |<-Map 5 [SIMPLE_EDGE] + | Reduce Output Operator [RS_19] + | key expressions:_col0 (type: string) + | Map-reduce partition columns:_col0 (type: string) + | sort order:+ + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | Select Operator [SEL_5] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | Filter Operator [FIL_33] + | predicate:(key > '2') (type: boolean) + | Statistics:Num rows: 166 Data size: 14442 Basic stats: COMPLETE Column stats: COMPLETE + | TableScan [TS_3] + | alias:src_cbo + | Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + |<-Reducer 7 [SIMPLE_EDGE] + Reduce Output Operator [RS_22] + sort order: + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator [SEL_14] + Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator [FIL_34] + predicate:(_col0 = 0) (type: boolean) + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator [GBY_13] + | aggregations:["count(VALUE._col0)"] + | outputColumnNames:["_col0"] + | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + |<-Map 6 [SIMPLE_EDGE] + Reduce Output Operator [RS_12] sort order: - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Select Operator [SEL_11] - Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE - Filter Operator [FIL_32] - predicate:(_col0 = 0) (type: boolean) - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator [GBY_10] - | aggregations:["count(VALUE._col0)"] - | outputColumnNames:["_col0"] - | Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - |<-Map 5 [SIMPLE_EDGE] - Reduce Output Operator [RS_9] - sort order: - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - value expressions:_col0 (type: bigint) - Group By Operator [GBY_8] - aggregations:["count()"] - outputColumnNames:["_col0"] - Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator [SEL_5] - Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator [FIL_33] - predicate:((key > '2') and key is null) (type: boolean) - Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE - TableScan [TS_3] - alias:src_cbo - Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions:_col0 (type: bigint) + Group By Operator [GBY_11] + aggregations:["count()"] + outputColumnNames:["_col0"] + Statistics:Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator [SEL_8] + Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator [FIL_35] + predicate:((key > '2') and key is null) (type: boolean) + Statistics:Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + TableScan [TS_6] + alias:src_cbo + Statistics:Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE PREHOOK: query: explain select p_mfgr, b.p_name, p_size from part b diff --git ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out new file mode 100644 index 0000000..52b4288 --- /dev/null +++ ql/src/test/results/clientpositive/tez/tez_dynpart_hashjoin_3.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 3 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 2 + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {(KEY.reducesinkkey0 < 100)} + keys: + 0 UDFToInteger(_col0) (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col2 (type: int) + sort order: + + Map-reduce partition columns: _col2 (type: int) + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) and (a.cint < 100) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 3 <- Map 1 (BROADCAST_EDGE), Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 215 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join0 to 1 + filter predicates: + 0 + 1 {(_col2 < 100)} + keys: + 0 UDFToInteger(_col0) (type: int) + 1 _col2 (type: int) + outputColumnNames: _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 + input vertices: + 0 Map 1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + HybridGraceHashJoin: true + Select Operator + expressions: _col1 (type: tinyint), _col2 (type: smallint), _col3 (type: int), _col4 (type: bigint), _col5 (type: float), _col6 (type: double), _col7 (type: string), _col8 (type: string), _col9 (type: timestamp), _col10 (type: timestamp), _col11 (type: boolean), _col12 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink +