diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java index 4825a61..d5025ab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java @@ -26,7 +26,6 @@ import java.util.Set; import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelOptUtil.InputFinder; import org.apache.calcite.plan.RelOptUtil.InputReferencedVisitor; @@ -34,7 +33,6 @@ import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories.ProjectFactory; import org.apache.calcite.rel.core.Sort; -import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; @@ -628,10 +626,10 @@ public String apply(RexNode r) { } }; - public static ImmutableList getPredsNotPushedAlready(RelNode inp, List predsToPushDown) { - final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(inp); - final ImmutableSet alreadyPushedPreds = ImmutableSet.copyOf(Lists.transform( - predicates.pulledUpPredicates, REX_STR_FN)); + public static ImmutableList getPredsNotPushedAlready(List alreadyPushed, + List predsToPushDown) { + final ImmutableSet alreadyPushedPreds = ImmutableSet.copyOf( + Lists.transform(alreadyPushed, REX_STR_FN)); final ImmutableList.Builder newConjuncts = ImmutableList.builder(); for (RexNode r : predsToPushDown) { if (!alreadyPushedPreds.contains(r.toString())) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java index d15d885..4af1f8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveCostModel.java @@ -56,8 +56,8 @@ public RelOptCost getJoinCost(HiveJoin join) { JoinAlgorithm joinAlgorithm = null; RelOptCost minJoinCost = null; - if (LOG.isDebugEnabled()) { - LOG.debug("Join algorithm selection for:\n" + RelOptUtil.toString(join)); + if (LOG.isTraceEnabled()) { + LOG.trace("Join algorithm selection for:\n" + RelOptUtil.toString(join)); } for (JoinAlgorithm possibleAlgorithm : this.joinAlgorithms) { @@ -65,8 +65,8 @@ public RelOptCost getJoinCost(HiveJoin join) { continue; } RelOptCost joinCost = possibleAlgorithm.getCost(join); - if (LOG.isDebugEnabled()) { - LOG.debug(possibleAlgorithm + " cost: " + joinCost); + if (LOG.isTraceEnabled()) { + LOG.trace(possibleAlgorithm + " cost: " + joinCost); } if (minJoinCost == null || joinCost.isLt(minJoinCost) ) { joinAlgorithm = possibleAlgorithm; @@ -74,8 +74,8 @@ public RelOptCost getJoinCost(HiveJoin join) { } } - if (LOG.isDebugEnabled()) { - LOG.debug(joinAlgorithm + " selected"); + if (LOG.isTraceEnabled()) { + LOG.trace(joinAlgorithm + " selected"); } join.setJoinAlgorithm(joinAlgorithm); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java index 27b1e76..5399df0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveJoin.java @@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel.DefaultJoinAlgorithm; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; //TODO: Should we convert MultiJoin to be a child of HiveJoin public class HiveJoin extends Join implements HiveRelNode { @@ -62,14 +63,18 @@ private final JoinPredicateInfo joinPredInfo; private JoinAlgorithm joinAlgorithm; private RelOptCost joinCost; + private List predicatesPushedToLeft; + private List predicatesPushedToRight; public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, boolean leftSemiJoin) { try { Set variablesStopped = Collections.emptySet(); + List predicatesPushedToLeft = Lists.newArrayList(); + List predicatesPushedToRight = Lists.newArrayList(); HiveJoin join = new HiveJoin(cluster, null, left, right, condition, joinType, variablesStopped, - DefaultJoinAlgorithm.INSTANCE, leftSemiJoin); + predicatesPushedToLeft, predicatesPushedToRight, DefaultJoinAlgorithm.INSTANCE, leftSemiJoin); return join; } catch (InvalidRelException | CalciteSemanticException e) { throw new RuntimeException(e); @@ -78,6 +83,7 @@ public static HiveJoin getJoin(RelOptCluster cluster, RelNode left, RelNode righ protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelNode right, RexNode condition, JoinRelType joinType, Set variablesStopped, + List predicatesPushedToLeft, List predicatesPushedToRight, JoinAlgorithm joinAlgo, boolean leftSemiJoin) throws InvalidRelException, CalciteSemanticException { super(cluster, TraitsUtil.getDefaultTraitSet(cluster), left, right, condition, joinType, variablesStopped); @@ -90,6 +96,8 @@ protected HiveJoin(RelOptCluster cluster, RelTraitSet traits, RelNode left, RelN this.joinFilter = HiveRelOptUtil.splitHiveJoinCondition(systemFieldList, this.getInputs(), this.getCondition(), joinKeyExprs, filterNulls, null); this.joinPredInfo = HiveCalciteUtil.JoinPredicateInfo.constructJoinPredicateInfo(this); + this.predicatesPushedToLeft = predicatesPushedToLeft; + this.predicatesPushedToRight = predicatesPushedToRight; this.joinAlgorithm = joinAlgo; this.leftSemiJoin = leftSemiJoin; } @@ -104,7 +112,8 @@ public final HiveJoin copy(RelTraitSet traitSet, RexNode conditionExpr, RelNode try { Set variablesStopped = Collections.emptySet(); return new HiveJoin(getCluster(), traitSet, left, right, conditionExpr, joinType, - variablesStopped, joinAlgorithm, leftSemiJoin); + variablesStopped, predicatesPushedToLeft, predicatesPushedToRight, + joinAlgorithm, leftSemiJoin); } catch (InvalidRelException | CalciteSemanticException e) { // Semantic error not possible. Must be a bug. Convert to // internal error. @@ -128,6 +137,14 @@ public JoinAlgorithm getJoinAlgorithm() { return this.joinAlgorithm; } + public List getPredicatesPushedToLeft() { + return this.predicatesPushedToLeft; + } + + public List getPredicatesPushedToRight() { + return this.predicatesPushedToRight; + } + public ImmutableList getCollation() { return joinAlgorithm.getCollation(this); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java index 8b57b35..7cfb007 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveUnion.java @@ -24,9 +24,8 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.SetOp; import org.apache.calcite.rel.core.Union; -import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode.Implementor; -public class HiveUnion extends Union { +public class HiveUnion extends Union implements HiveRelNode { public HiveUnion(RelOptCluster cluster, RelTraitSet traits, List inputs) { super(cluster, traits, inputs, true); @@ -37,6 +36,7 @@ public SetOp copy(RelTraitSet traitSet, List inputs, boolean all) { return new HiveUnion(this.getCluster(), traitSet, inputs); } + @Override public void implement(Implementor implementor) { } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java index de880ce..22c3ff1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinAddNotNullRule.java @@ -46,8 +46,6 @@ public final class HiveJoinAddNotNullRule extends RelOptRule { - private static final String NOT_NULL_FUNC_NAME = "isnotnull"; - /** The singleton. */ public static final HiveJoinAddNotNullRule INSTANCE = new HiveJoinAddNotNullRule(HiveRelFactories.HIVE_FILTER_FACTORY); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java index 703c8c6..f0d37e3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -23,6 +23,7 @@ import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.RelFactories; @@ -38,10 +39,13 @@ import org.apache.calcite.util.Util; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull; import org.apache.hive.common.util.AnnotationUtils; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; /** * Planner rule that infers predicates from on a @@ -59,47 +63,43 @@ /** The singleton. */ public static final HiveJoinPushTransitivePredicatesRule INSTANCE = - new HiveJoinPushTransitivePredicatesRule(Join.class, - RelFactories.DEFAULT_FILTER_FACTORY); + new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, + HiveRelFactories.HIVE_FILTER_FACTORY); public HiveJoinPushTransitivePredicatesRule(Class clazz, RelFactories.FilterFactory filterFactory) { - super(operand(clazz, operand(RelNode.class, any()), - operand(RelNode.class, any()))); + super(operand(clazz, any())); this.filterFactory = filterFactory; } - @Override public void onMatch(RelOptRuleCall call) { - Join join = call.rel(0); - - // Register that we have visited this operator in this rule - HiveRulesRegistry registry = call.getPlanner().getContext().unwrap(HiveRulesRegistry.class); - if (registry != null) { - registry.registerVisited(this, join); - } + @Override + public void onMatch(RelOptRuleCall call) { + HiveJoin join = call.rel(0); RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); RexBuilder rB = join.getCluster().getRexBuilder(); - RelNode lChild = call.rel(1); - RelNode rChild = call.rel(2); + RelNode lChild = join.getLeft(); + RelNode rChild = join.getRight(); - List leftPreds = getValidPreds(join.getCluster(), lChild, preds.leftInferredPredicates, lChild.getRowType()); - List rightPreds = getValidPreds(join.getCluster(), rChild, preds.rightInferredPredicates, rChild.getRowType()); + List leftPreds = getValidPreds(join.getCluster(), join, lChild, preds.leftInferredPredicates, lChild.getRowType()); + List rightPreds = getValidPreds(join.getCluster(), join, rChild, preds.rightInferredPredicates, rChild.getRowType()); - if (leftPreds.isEmpty() && rightPreds.isEmpty()) { + RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false); + RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false); + if (newLeftPredicate.isAlwaysTrue() && newRightPredicate.isAlwaysTrue()) { return; } - if (leftPreds.size() > 0) { + if (!newLeftPredicate.isAlwaysTrue()) { RelNode curr = lChild; - lChild = filterFactory.createFilter(lChild, RexUtil.composeConjunction(rB, leftPreds, false)); + lChild = filterFactory.createFilter(lChild, newLeftPredicate); call.getPlanner().onCopy(curr, lChild); } - if (rightPreds.size() > 0) { + if (!newRightPredicate.isAlwaysTrue()) { RelNode curr = rChild; - rChild = filterFactory.createFilter(rChild, RexUtil.composeConjunction(rB, rightPreds, false)); + rChild = filterFactory.createFilter(rChild, newRightPredicate); call.getPlanner().onCopy(curr, rChild); } @@ -107,15 +107,10 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, lChild, rChild, join.getJoinType(), join.isSemiJoinDone()); call.getPlanner().onCopy(join, newRel); - // We register new Join rel so we do not fire the rule on them again - if (registry != null) { - registry.registerVisited(this, newRel); - } - call.transformTo(newRel); } - private ImmutableList getValidPreds(RelOptCluster cluster, RelNode rn, + private ImmutableList getValidPreds(RelOptCluster cluster, HiveJoin join, RelNode child, List rexs, RelDataType rType) { InputRefValidator validator = new InputRefValidator(rType.getFieldList()); List valids = new ArrayList(rexs.size()); @@ -128,7 +123,24 @@ public HiveJoinPushTransitivePredicatesRule(Class clazz, } } - return HiveCalciteUtil.getPredsNotPushedAlready(rn, valids); + // We need to filter i) those that have been pushed already as stored in the join, + // and ii) those that were already in the subtree rooted at child + List predicatesToExclude = Lists.newArrayList(); + List predicatesPushed; + if (join.getLeft() == child) { + predicatesPushed = join.getPredicatesPushedToLeft(); + } else { + predicatesPushed = join.getPredicatesPushedToRight(); + } + predicatesToExclude.addAll(predicatesPushed); + final RelOptPredicateList predicatesInSubtree = RelMetadataQuery.getPulledUpPredicates(child); + for (RexNode pred : predicatesInSubtree.pulledUpPredicates) { + predicatesToExclude.add(pred); + predicatesToExclude.addAll(RelOptUtil.conjunctions(pred)); + } + ImmutableList toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, valids); + predicatesPushed.addAll(toPush); + return toPush; } private RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex, RelDataType rType) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index d37fc0e..ef22778 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -29,6 +29,7 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Filter; import org.apache.calcite.rel.core.RelFactories.FilterFactory; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; @@ -167,15 +168,16 @@ public void onMatch(RelOptRuleCall call) { } // 3. If the new conjuncts are already present in the plan, we bail out - final List newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready(filter.getInput(), + final List newConjuncts = HiveCalciteUtil.getPredsNotPushedAlready( + RelMetadataQuery.getPulledUpPredicates(filter.getInput()).pulledUpPredicates, operandsToPushDown); - if (newConjuncts.isEmpty()) { + RexNode newPredicate = RexUtil.composeConjunction(rexBuilder, newConjuncts, false); + if (newPredicate.isAlwaysTrue()) { return; } // 4. Otherwise, we create a new condition - final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, - RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); + final RexNode newChildFilterCondition = RexUtil.pullFactors(rexBuilder, newPredicate); // 5. We create the new filter that might be pushed down RelNode newChildFilter = filterFactory.createFilter(filter.getInput(), newChildFilterCondition); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f50f4d3..717d0b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -59,7 +59,6 @@ import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.AggregateCall; import org.apache.calcite.rel.core.Filter; -import org.apache.calcite.rel.core.Join; import org.apache.calcite.rel.core.JoinRelType; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; @@ -77,14 +76,12 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeField; import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.rex.RexExecutorImpl; import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindowBound; import org.apache.calcite.schema.SchemaPlus; -import org.apache.calcite.schema.Schemas; import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlCall; import org.apache.calcite.sql.SqlExplainLevel; @@ -1077,31 +1074,42 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 3. PPD for old Join Syntax // NOTE: PPD needs to run before adding not null filters in order to // support old style join syntax (so that on-clauses will get filled up). - // TODO: Add in ReduceExpressionrules (Constant folding) to below once - // HIVE-11927 is fixed. perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, - HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, - HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( - HiveRelFactories.HIVE_FILTER_FACTORY)); + basePlan = hepPlan(basePlan, true, mdProvider, null, + HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + HiveFilterSetOpTransposeRule.INSTANCE, + HiveFilterSortTransposeRule.INSTANCE, + HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, + new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), + new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, PPD for old join syntax"); + // 4. Add not null filters + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = hepPlan(basePlan, true, mdProvider, null, HiveJoinAddNotNullRule.INSTANCE); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Prejoin ordering transformation, Add not null filters"); - // TODO: Transitive inference, constant prop & Predicate push down has to - // do multiple passes till no more inference is left - // Currently doing so would result in a spin. Just checking for if inferred - // pred is present below may not be sufficient as inferred & pushed pred - // could have been mutated by constant folding/prop - // 4. Transitive inference for join on clauses + // 5. Run exhaustive PPD, transitive inference, constant propagation, constant folding perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, new HiveJoinPushTransitivePredicatesRule( - Join.class, HiveRelFactories.HIVE_FILTER_FACTORY)); + basePlan = hepPlan(basePlan, true, mdProvider, null, + HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, + HiveFilterSetOpTransposeRule.INSTANCE, + HiveFilterSortTransposeRule.INSTANCE, + HiveFilterJoinRule.JOIN, + HiveFilterJoinRule.FILTER_ON_JOIN, + new HiveFilterAggregateTransposeRule(Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), + new FilterMergeRule(HiveRelFactories.HIVE_FILTER_FACTORY), + HiveJoinPushTransitivePredicatesRule.INSTANCE, + HiveReduceExpressionsRule.PROJECT_INSTANCE, + HiveReduceExpressionsRule.FILTER_INSTANCE, + HiveReduceExpressionsRule.JOIN_INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Transitive inference for join on clauses"); + "Calcite: Prejoin ordering transformation, PPD, transitive inference, constant folding"); - // 5. Push down limit through outer join + // 6. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 @@ -1123,46 +1131,20 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } - // 6. Add not null filters - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveJoinAddNotNullRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Add not null filters"); - - // 7. Rerun Constant propagation and PPD now that we have added Not NULL filters & did transitive inference - // TODO: Add in ReduceExpressionrules (Constant folding) to below once - // HIVE-11927 is fixed. - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HiveFilterProjectTransposeRule.INSTANCE_DETERMINISTIC, - HiveFilterSetOpTransposeRule.INSTANCE, HiveFilterSortTransposeRule.INSTANCE, HiveFilterJoinRule.JOIN, - HiveFilterJoinRule.FILTER_ON_JOIN, new HiveFilterAggregateTransposeRule(Filter.class, - HiveRelFactories.HIVE_FILTER_FACTORY, Aggregate.class), new FilterMergeRule( - HiveRelFactories.HIVE_FILTER_FACTORY)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Constant propagation and PPD"); - - // 8. Push Down Semi Joins + // 7. Push Down Semi Joins perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); - // 9. Constant folding - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, - HiveReduceExpressionsRule.PROJECT_INSTANCE, HiveReduceExpressionsRule.FILTER_INSTANCE, - HiveReduceExpressionsRule.JOIN_INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Constant folding"); - - // 10. Apply Partition Pruning + // 8. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Partition Pruning"); - // 11. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) + // 9. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, HiveRelFactories.HIVE_BUILDER.create(cluster, null)); @@ -1170,14 +1152,14 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Projection Pruning"); - // 12. Merge Project-Project if possible + // 10. Merge Project-Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, HiveRelFactories.HIVE_PROJECT_FACTORY)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation, Merge Project-Project"); - // 13. Rerun PPD through Project as column pruning would have introduced + // 11. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS