diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java index 87fc1b9..b1eca7d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexUtil.java @@ -411,8 +411,10 @@ public static RexNode simplifyAnd(RexBuilder rexBuilder, RexCall e, public static RexNode simplifyAnd2(RexBuilder rexBuilder, List terms, List notTerms) { - if (terms.contains(rexBuilder.makeLiteral(false))) { - return rexBuilder.makeLiteral(false); + for (RexNode term : terms) { + if (term.isAlwaysFalse()) { + return rexBuilder.makeLiteral(false); + } } if (terms.isEmpty() && notTerms.isEmpty()) { return rexBuilder.makeLiteral(true); @@ -447,8 +449,10 @@ public static RexNode simplifyAnd2(RexBuilder rexBuilder, * UNKNOWN it will be interpreted as FALSE. */ public static RexNode simplifyAnd2ForUnknownAsFalse(RexBuilder rexBuilder, List terms, List notTerms) { - if (terms.contains(rexBuilder.makeLiteral(false))) { - return rexBuilder.makeLiteral(false); + for (RexNode term : terms) { + if (term.isAlwaysFalse()) { + return rexBuilder.makeLiteral(false); + } } if (terms.isEmpty() && notTerms.isEmpty()) { return rexBuilder.makeLiteral(true); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java index 9cec6ca..09e0fc1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/HiveRelMdPredicates.java @@ -18,16 +18,27 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.stats; import java.util.ArrayList; +import java.util.BitSet; +import java.util.HashSet; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; +import java.util.SortedMap; +import org.apache.calcite.linq4j.Linq4j; import org.apache.calcite.linq4j.Ord; +import org.apache.calcite.linq4j.function.Predicate1; import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Aggregate; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.core.SemiJoin; import org.apache.calcite.rel.core.Union; import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMdPredicates; @@ -40,7 +51,10 @@ import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexPermuteInputsShuttle; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.util.BitSets; import org.apache.calcite.util.BuiltInMethod; import org.apache.calcite.util.ImmutableBitSet; import org.apache.calcite.util.mapping.Mapping; @@ -48,8 +62,13 @@ import org.apache.calcite.util.mapping.Mappings; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import com.google.common.base.Function; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; //TODO: Move this to calcite @@ -131,6 +150,62 @@ public RelOptPredicateList getPredicates(Project project, RelMetadataQuery mq) { return RelOptPredicateList.of(projectPullUpPredicates); } + /** Infers predicates for a {@link org.apache.calcite.rel.core.Join}. */ + @Override + public RelOptPredicateList getPredicates(Join join, RelMetadataQuery mq) { + RexBuilder rB = join.getCluster().getRexBuilder(); + RelNode left = join.getInput(0); + RelNode right = join.getInput(1); + + final RelOptPredicateList leftInfo = mq.getPulledUpPredicates(left); + final RelOptPredicateList rightInfo = mq.getPulledUpPredicates(right); + + JoinConditionBasedPredicateInference jI = + new JoinConditionBasedPredicateInference(join, + RexUtil.composeConjunction(rB, leftInfo.pulledUpPredicates, false), + RexUtil.composeConjunction(rB, rightInfo.pulledUpPredicates, + false)); + + return jI.inferPredicates(false); + } + + /** + * Infers predicates for an Aggregate. + * + *

Pulls up predicates that only contains references to columns in the + * GroupSet. For e.g. + * + *

+   * inputPullUpExprs : { a > 7, b + c < 10, a + e = 9}
+   * groupSet         : { a, b}
+   * pulledUpExprs    : { a > 7}
+   * 
+ */ + @Override + public RelOptPredicateList getPredicates(Aggregate agg, RelMetadataQuery mq) { + final RelNode input = agg.getInput(); + final RelOptPredicateList inputInfo = mq.getPulledUpPredicates(input); + final List aggPullUpPredicates = new ArrayList<>(); + + ImmutableBitSet groupKeys = agg.getGroupSet(); + Mapping m = Mappings.create(MappingType.PARTIAL_FUNCTION, + input.getRowType().getFieldCount(), agg.getRowType().getFieldCount()); + + int i = 0; + for (int j : groupKeys) { + m.set(j, i++); + } + + for (RexNode r : inputInfo.pulledUpPredicates) { + ImmutableBitSet rCols = RelOptUtil.InputFinder.bits(r); + if (!rCols.isEmpty() && groupKeys.contains(rCols)) { + r = r.accept(new RexPermuteInputsShuttle(m, input)); + aggPullUpPredicates.add(r); + } + } + return RelOptPredicateList.of(aggPullUpPredicates); + } + /** * Infers predicates for a Union. */ @@ -176,4 +251,433 @@ public RelOptPredicateList getPredicates(Union union, RelMetadataQuery mq) { return RelOptPredicateList.of(preds); } + /** + * Utility to infer predicates from one side of the join that apply on the + * other side. + * + *

Contract is:

    + * + *
  • initialize with a {@link org.apache.calcite.rel.core.Join} and + * optional predicates applicable on its left and right subtrees. + * + *
  • you can + * then ask it for equivalentPredicate(s) given a predicate. + * + *
+ * + *

So for: + *

    + *
  1. 'R1(x) join R2(y) on x = y' a call for + * equivalentPredicates on 'x > 7' will return ' + * [y > 7]' + *
  2. 'R1(x) join R2(y) on x = y join R3(z) on y = z' a call for + * equivalentPredicates on the second join 'x > 7' will return + *
+ */ + static class JoinConditionBasedPredicateInference { + final Join joinRel; + final boolean isSemiJoin; + final int nSysFields; + final int nFieldsLeft; + final int nFieldsRight; + final ImmutableBitSet leftFieldsBitSet; + final ImmutableBitSet rightFieldsBitSet; + final ImmutableBitSet allFieldsBitSet; + SortedMap equivalence; + final Map exprFields; + final Set allExprsDigests; + final Set equalityPredicates; + final RexNode leftChildPredicates; + final RexNode rightChildPredicates; + + public JoinConditionBasedPredicateInference(Join joinRel, + RexNode lPreds, RexNode rPreds) { + this(joinRel, joinRel instanceof SemiJoin, lPreds, rPreds); + } + + private JoinConditionBasedPredicateInference(Join joinRel, boolean isSemiJoin, + RexNode lPreds, RexNode rPreds) { + super(); + this.joinRel = joinRel; + this.isSemiJoin = isSemiJoin; + nFieldsLeft = joinRel.getLeft().getRowType().getFieldList().size(); + nFieldsRight = joinRel.getRight().getRowType().getFieldList().size(); + nSysFields = joinRel.getSystemFieldList().size(); + leftFieldsBitSet = ImmutableBitSet.range(nSysFields, + nSysFields + nFieldsLeft); + rightFieldsBitSet = ImmutableBitSet.range(nSysFields + nFieldsLeft, + nSysFields + nFieldsLeft + nFieldsRight); + allFieldsBitSet = ImmutableBitSet.range(0, + nSysFields + nFieldsLeft + nFieldsRight); + + exprFields = Maps.newHashMap(); + allExprsDigests = new HashSet<>(); + + if (lPreds == null) { + leftChildPredicates = null; + } else { + Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft, nSysFields, 0, nFieldsLeft); + leftChildPredicates = lPreds.accept( + new RexPermuteInputsShuttle(leftMapping, joinRel.getInput(0))); + + for (RexNode r : RelOptUtil.conjunctions(leftChildPredicates)) { + exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); + allExprsDigests.add(r.toString()); + } + } + if (rPreds == null) { + rightChildPredicates = null; + } else { + Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft + nFieldsRight, + nSysFields + nFieldsLeft, 0, nFieldsRight); + rightChildPredicates = rPreds.accept( + new RexPermuteInputsShuttle(rightMapping, joinRel.getInput(1))); + + for (RexNode r : RelOptUtil.conjunctions(rightChildPredicates)) { + exprFields.put(r.toString(), RelOptUtil.InputFinder.bits(r)); + allExprsDigests.add(r.toString()); + } + } + + equivalence = Maps.newTreeMap(); + equalityPredicates = new HashSet<>(); + for (int i = 0; i < nSysFields + nFieldsLeft + nFieldsRight; i++) { + equivalence.put(i, BitSets.of(i)); + } + + // Only process equivalences found in the join conditions. Processing + // Equivalences from the left or right side infer predicates that are + // already present in the Tree below the join. + RexBuilder rexBuilder = joinRel.getCluster().getRexBuilder(); + List exprs = + RelOptUtil.conjunctions( + compose(rexBuilder, ImmutableList.of(joinRel.getCondition()))); + + final EquivalenceFinder eF = new EquivalenceFinder(); + new ArrayList<>( + Lists.transform(exprs, + new Function() { + public Void apply(RexNode input) { + return input.accept(eF); + } + })); + + equivalence = BitSets.closure(equivalence); + } + + /** + * The PullUp Strategy is sound but not complete. + *
    + *
  1. We only pullUp inferred predicates for now. Pulling up existing + * predicates causes an explosion of duplicates. The existing predicates are + * pushed back down as new predicates. Once we have rules to eliminate + * duplicate Filter conditions, we should pullUp all predicates. + *
  2. For Left Outer: we infer new predicates from the left and set them as + * applicable on the Right side. No predicates are pulledUp. + *
  3. Right Outer Joins are handled in an analogous manner. + *
  4. For Full Outer Joins no predicates are pulledUp or inferred. + *
+ */ + public RelOptPredicateList inferPredicates( + boolean includeEqualityInference) { + final List inferredPredicates = new ArrayList<>(); + final List nonFieldsPredicates = new ArrayList<>(); + final Set allExprsDigests = new HashSet<>(this.allExprsDigests); + final JoinRelType joinType = joinRel.getJoinType(); + final List leftPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(leftChildPredicates)); + final List rightPreds = ImmutableList.copyOf(RelOptUtil.conjunctions(rightChildPredicates)); + switch (joinType) { + case INNER: + case LEFT: + infer(leftPreds, allExprsDigests, inferredPredicates, + nonFieldsPredicates, includeEqualityInference, + joinType == JoinRelType.LEFT ? rightFieldsBitSet + : allFieldsBitSet); + break; + } + switch (joinType) { + case INNER: + case RIGHT: + infer(rightPreds, allExprsDigests, inferredPredicates, + nonFieldsPredicates, includeEqualityInference, + joinType == JoinRelType.RIGHT ? leftFieldsBitSet + : allFieldsBitSet); + break; + } + + Mappings.TargetMapping rightMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft + nFieldsRight, + 0, nSysFields + nFieldsLeft, nFieldsRight); + final RexPermuteInputsShuttle rightPermute = + new RexPermuteInputsShuttle(rightMapping, joinRel); + Mappings.TargetMapping leftMapping = Mappings.createShiftMapping( + nSysFields + nFieldsLeft, 0, nSysFields, nFieldsLeft); + final RexPermuteInputsShuttle leftPermute = + new RexPermuteInputsShuttle(leftMapping, joinRel); + final List leftInferredPredicates = new ArrayList<>(); + final List rightInferredPredicates = new ArrayList<>(); + + for (RexNode iP : inferredPredicates) { + ImmutableBitSet iPBitSet = RelOptUtil.InputFinder.bits(iP); + if (leftFieldsBitSet.contains(iPBitSet)) { + leftInferredPredicates.add(iP.accept(leftPermute)); + } else if (rightFieldsBitSet.contains(iPBitSet)) { + rightInferredPredicates.add(iP.accept(rightPermute)); + } + } + + if (joinType == JoinRelType.INNER && !nonFieldsPredicates.isEmpty()) { + // Predicates without field references can be pushed to both inputs + final Set leftPredsSet = new HashSet( + Lists.transform(leftPreds, HiveCalciteUtil.REX_STR_FN)); + final Set rightPredsSet = new HashSet( + Lists.transform(rightPreds, HiveCalciteUtil.REX_STR_FN)); + for (RexNode iP : nonFieldsPredicates) { + if (!leftPredsSet.contains(iP.toString())) { + leftInferredPredicates.add(iP); + } + if (!rightPredsSet.contains(iP.toString())) { + rightInferredPredicates.add(iP); + } + } + } + + switch (joinType) { + case INNER: + Iterable pulledUpPredicates; + if (isSemiJoin) { + pulledUpPredicates = Iterables.concat(leftPreds, leftInferredPredicates); + } else { + pulledUpPredicates = Iterables.concat(leftPreds, rightPreds, + RelOptUtil.conjunctions(joinRel.getCondition()), inferredPredicates); + } + return RelOptPredicateList.of( + pulledUpPredicates, leftInferredPredicates, rightInferredPredicates); + case LEFT: + return RelOptPredicateList.of( + leftPreds, EMPTY_LIST, rightInferredPredicates); + case RIGHT: + return RelOptPredicateList.of( + rightPreds, leftInferredPredicates, EMPTY_LIST); + default: + assert inferredPredicates.size() == 0; + return RelOptPredicateList.EMPTY; + } + } + + public RexNode left() { + return leftChildPredicates; + } + + public RexNode right() { + return rightChildPredicates; + } + + private void infer(List predicates, Set allExprsDigests, + List inferedPredicates, List nonFieldsPredicates, + boolean includeEqualityInference, ImmutableBitSet inferringFields) { + for (RexNode r : predicates) { + if (!includeEqualityInference + && equalityPredicates.contains(r.toString())) { + continue; + } + Iterable ms = mappings(r); + if (ms.iterator().hasNext()) { + for (Mapping m : ms) { + RexNode tr = r.accept( + new RexPermuteInputsShuttle(m, joinRel.getInput(0), + joinRel.getInput(1))); + if (inferringFields.contains(RelOptUtil.InputFinder.bits(tr)) + && !allExprsDigests.contains(tr.toString()) + && !isAlwaysTrue(tr)) { + inferedPredicates.add(tr); + allExprsDigests.add(tr.toString()); + } + } + } else { + if (!isAlwaysTrue(r)) { + nonFieldsPredicates.add(r); + } + } + } + } + + Iterable mappings(final RexNode predicate) { + return new Iterable() { + public Iterator iterator() { + ImmutableBitSet fields = exprFields.get(predicate.toString()); + if (fields.cardinality() == 0) { + return Iterators.emptyIterator(); + } + return new ExprsItr(fields); + } + }; + } + + private void equivalent(int p1, int p2) { + BitSet b = equivalence.get(p1); + b.set(p2); + + b = equivalence.get(p2); + b.set(p1); + } + + RexNode compose(RexBuilder rexBuilder, Iterable exprs) { + exprs = Linq4j.asEnumerable(exprs).where(new Predicate1() { + public boolean apply(RexNode expr) { + return expr != null; + } + }); + return RexUtil.composeConjunction(rexBuilder, exprs, false); + } + + /** + * Find expressions of the form 'col_x = col_y'. + */ + class EquivalenceFinder extends RexVisitorImpl { + protected EquivalenceFinder() { + super(true); + } + + @Override public Void visitCall(RexCall call) { + if (call.getOperator().getKind() == SqlKind.EQUALS) { + int lPos = pos(call.getOperands().get(0)); + int rPos = pos(call.getOperands().get(1)); + if (lPos != -1 && rPos != -1) { + JoinConditionBasedPredicateInference.this.equivalent(lPos, rPos); + JoinConditionBasedPredicateInference.this.equalityPredicates + .add(call.toString()); + } + } + return null; + } + } + + /** + * Given an expression returns all the possible substitutions. + * + *

For example, for an expression 'a + b + c' and the following + * equivalences:

+     * a : {a, b}
+     * b : {a, b}
+     * c : {c, e}
+     * 
+ * + *

The following Mappings will be returned: + *

+     * {a → a, b → a, c → c}
+     * {a → a, b → a, c → e}
+     * {a → a, b → b, c → c}
+     * {a → a, b → b, c → e}
+     * {a → b, b → a, c → c}
+     * {a → b, b → a, c → e}
+     * {a → b, b → b, c → c}
+     * {a → b, b → b, c → e}
+     * 
+ * + *

which imply the following inferences: + *

+     * a + a + c
+     * a + a + e
+     * a + b + c
+     * a + b + e
+     * b + a + c
+     * b + a + e
+     * b + b + c
+     * b + b + e
+     * 
+ */ + class ExprsItr implements Iterator { + final int[] columns; + final BitSet[] columnSets; + final int[] iterationIdx; + Mapping nextMapping; + boolean firstCall; + + ExprsItr(ImmutableBitSet fields) { + nextMapping = null; + columns = new int[fields.cardinality()]; + columnSets = new BitSet[fields.cardinality()]; + iterationIdx = new int[fields.cardinality()]; + for (int j = 0, i = fields.nextSetBit(0); i >= 0; i = fields + .nextSetBit(i + 1), j++) { + columns[j] = i; + columnSets[j] = equivalence.get(i); + iterationIdx[j] = 0; + } + firstCall = true; + } + + public boolean hasNext() { + if (firstCall) { + initializeMapping(); + firstCall = false; + } else { + computeNextMapping(iterationIdx.length - 1); + } + return nextMapping != null; + } + + public Mapping next() { + return nextMapping; + } + + public void remove() { + throw new UnsupportedOperationException(); + } + + private void computeNextMapping(int level) { + int t = columnSets[level].nextSetBit(iterationIdx[level]); + if (t < 0) { + if (level == 0) { + nextMapping = null; + } else { + iterationIdx[level] = 0; + computeNextMapping(level - 1); + } + } else { + nextMapping.set(columns[level], t); + iterationIdx[level] = t + 1; + } + } + + private void initializeMapping() { + nextMapping = Mappings.create(MappingType.PARTIAL_FUNCTION, + nSysFields + nFieldsLeft + nFieldsRight, + nSysFields + nFieldsLeft + nFieldsRight); + for (int i = 0; i < columnSets.length; i++) { + BitSet c = columnSets[i]; + int t = c.nextSetBit(iterationIdx[i]); + if (t < 0) { + nextMapping = null; + return; + } + nextMapping.set(columns[i], t); + iterationIdx[i] = t + 1; + } + } + } + + private int pos(RexNode expr) { + if (expr instanceof RexInputRef) { + return ((RexInputRef) expr).getIndex(); + } + return -1; + } + + private boolean isAlwaysTrue(RexNode predicate) { + if (predicate instanceof RexCall) { + RexCall c = (RexCall) predicate; + if (c.getOperator().getKind() == SqlKind.EQUALS) { + int lPos = pos(c.getOperands().get(0)); + int rPos = pos(c.getOperands().get(1)); + return lPos != -1 && lPos == rPos; + } + } + return predicate.isAlwaysTrue(); + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 0d4c1bb..e8cd1a0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1117,6 +1117,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // Partition Pruning; otherwise Expression evaluation may try to execute // corelated sub query. + LOG.info("Jesus - Plan0: " + RelOptUtil.toString(basePlan)); + PerfLogger perfLogger = SessionState.getPerfLogger(); final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); @@ -1146,6 +1148,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); + LOG.info("Jesus - Plan2: " + RelOptUtil.toString(basePlan)); + // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding List rules = Lists.newArrayList(); @@ -1184,6 +1188,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); + LOG.info("Jesus - Plan3: " + RelOptUtil.toString(basePlan)); + // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or diff --git ql/src/test/results/clientpositive/constprog3.q.out ql/src/test/results/clientpositive/constprog3.q.out index cb440dc..f54168d 100644 --- ql/src/test/results/clientpositive/constprog3.q.out +++ ql/src/test/results/clientpositive/constprog3.q.out @@ -14,7 +14,7 @@ POSTHOOK: query: create temporary table table3(id int, val int, val1 int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@table3 -Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +Warning: Shuffle Join JOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select table1.id, table1.val, table1.val1 from table1 inner join table3 @@ -49,15 +49,15 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) TableScan alias: table3 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (id = 1) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + Filter Operator + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE Reduce Operator Tree: Join Operator condition map: diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index 38baed3..39d29ba 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -158,17 +158,17 @@ STAGE PLANS: alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: false (type: boolean) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: Join Operator @@ -178,10 +178,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/mergejoin.q.out ql/src/test/results/clientpositive/mergejoin.q.out index 8dd86de..4b83a2d 100644 --- ql/src/test/results/clientpositive/mergejoin.q.out +++ ql/src/test/results/clientpositive/mergejoin.q.out @@ -2692,7 +2692,6 @@ POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### NULL NULL NULL 98 val_98 2008-04-08 NULL NULL NULL 98 val_98 2008-04-08 -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from (select * from tab where tab.key = 0)a full outer join @@ -2700,7 +2699,6 @@ full outer join PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab_part -PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select * from (select * from tab where tab.key = 0)a @@ -2709,7 +2707,6 @@ full outer join POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab_part -POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: select * from diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index fd35547..63f93fb 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -527,13 +527,13 @@ STAGE DEPENDENCIES: Stage-17 has a backup stage: Stage-2 Stage-13 depends on stages: Stage-17 Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 + Stage-12 depends on stages: Stage-15 Stage-0 depends on stages: Stage-12 Stage-7 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-2, Stage-13 + Stage-4 depends on stages: Stage-16 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 Stage-2 STAGE PLANS: @@ -632,6 +632,70 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: + sq_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_1:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: sq_2:s1 Fetch Operator limit: -1 @@ -705,70 +769,6 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -852,12 +852,12 @@ RUN: Stage-17:MAPREDLOCAL RUN: Stage-13:MAPRED RUN: Stage-15:MAPREDLOCAL RUN: Stage-16:MAPREDLOCAL -RUN: Stage-4:MAPRED RUN: Stage-12:MAPRED -RUN: Stage-1:MOVE +RUN: Stage-4:MAPRED RUN: Stage-0:MOVE -RUN: Stage-5:STATS +RUN: Stage-1:MOVE RUN: Stage-7:STATS +RUN: Stage-5:STATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git ql/src/test/results/clientpositive/tez/mergejoin.q.out ql/src/test/results/clientpositive/tez/mergejoin.q.out index 1f70c46..03d282c 100644 --- ql/src/test/results/clientpositive/tez/mergejoin.q.out +++ ql/src/test/results/clientpositive/tez/mergejoin.q.out @@ -2677,7 +2677,6 @@ POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### NULL NULL NULL 98 val_98 2008-04-08 NULL NULL NULL 98 val_98 2008-04-08 -Warning: Shuffle Join MERGEJOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from (select * from tab where tab.key = 0)a full outer join @@ -2685,7 +2684,6 @@ full outer join PREHOOK: type: QUERY PREHOOK: Input: default@tab PREHOOK: Input: default@tab_part -PREHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### POSTHOOK: query: select * from (select * from tab where tab.key = 0)a @@ -2694,7 +2692,6 @@ full outer join POSTHOOK: type: QUERY POSTHOOK: Input: default@tab POSTHOOK: Input: default@tab_part -POSTHOOK: Input: default@tab_part@ds=2008-04-08 #### A masked pattern was here #### Warning: Shuffle Join MERGEJOIN[22][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from