From 720f0492c713c18891af4e61422f576367ea0b88 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 30 Jun 2015 10:13:21 -0700 Subject: [PATCH] HIVE-11151 : Calcite transitive predicate inference rule should pay attention to its input rel type --- .../HiveJoinPushTransitivePredicatesRule.java | 133 +++++++++++++++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +- 2 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java new file mode 100644 index 0000000..1e62431 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.Util; + +import com.google.common.collect.ImmutableList; + +/** + * Planner rule that infers predicates from on a + * {@link org.apache.calcite.rel.core.Join} and creates + * {@link org.apache.calcite.rel.core.Filter}s if those predicates can be pushed + * to its inputs. + * + *

Uses {@link org.apache.calcite.rel.metadata.RelMdPredicates} to infer + * the predicates, + * returns them in a {@link org.apache.calcite.plan.RelOptPredicateList} + * and applies them appropriately. + */ +public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { + private final RelFactories.FilterFactory filterFactory; + + /** The singleton. */ + public static final HiveJoinPushTransitivePredicatesRule INSTANCE = + new HiveJoinPushTransitivePredicatesRule(Join.class, + RelFactories.DEFAULT_FILTER_FACTORY); + + public HiveJoinPushTransitivePredicatesRule(Class clazz, + RelFactories.FilterFactory filterFactory) { + super(operand(clazz, any())); + this.filterFactory = filterFactory; + } + + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); + + RexBuilder rB = join.getCluster().getRexBuilder(); + RelNode lChild = join.getLeft(); + RelNode rChild = join.getRight(); + + List leftPreds = getValidPreds(preds.leftInferredPredicates,((HepRelVertex)lChild).getCurrentRel().getRowType().getFieldList()); + List rightPreds = getValidPreds(preds.rightInferredPredicates,((HepRelVertex)rChild).getCurrentRel().getRowType().getFieldList()); + + if (leftPreds.isEmpty() && rightPreds.isEmpty()) { + return; + } + + if (leftPreds.size() > 0) { + RelNode curr = lChild; + lChild = filterFactory.createFilter(lChild, RexUtil.composeConjunction(rB, leftPreds, false)); + call.getPlanner().onCopy(curr, lChild); + } + + if (rightPreds.size() > 0) { + RelNode curr = rChild; + rChild = filterFactory.createFilter(rChild, RexUtil.composeConjunction(rB, rightPreds, false)); + call.getPlanner().onCopy(curr, rChild); + } + + RelNode newRel = join.copy(join.getTraitSet(), join.getCondition(), + lChild, rChild, join.getJoinType(), join.isSemiJoinDone()); + call.getPlanner().onCopy(join, newRel); + + call.transformTo(newRel); + } + + private ImmutableList getValidPreds (List rexs, List types) { + InputRefValidator validator = new InputRefValidator(types); + List valids = new ArrayList(rexs.size()); + for (RexNode rex : rexs) { + try { + rex.accept(validator); + valids.add(rex); + } catch (Util.FoundOne e) { + Util.swallow(e, null); + } + } + return ImmutableList.copyOf(valids); + } + + private static class InputRefValidator extends RexVisitorImpl { + + private final List types; + protected InputRefValidator(List types) { + super(true); + this.types = types; + } + + @Override + public Void visitInputRef(RexInputRef inputRef) { + if(!inputRef.getType().equals(types.get(inputRef.getIndex()).getType())) { + throw new Util.FoundOne(inputRef); + } + return null; + } + } +} + +// End JoinPushTransitivePredicatesRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e0a778c..6c6c62a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -141,6 +141,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -266,7 +267,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept // here with SemiJoins not all tables are costed in CBO, so their // PartitionList is not evaluated until the run phase. getMetaData(getQB()); - + disableJoinMerge = false; sinkOp = genPlan(getQB()); LOG.info("CBO Succeeded; optimized logical plan."); @@ -970,7 +971,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // 5. Transitive inference & Partition Pruning basePlan = hepPlan(basePlan, false, mdProvider, - new JoinPushTransitivePredicatesRule(Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), + new HiveJoinPushTransitivePredicatesRule(Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); // 6. Projection Pruning -- 1.7.12.4 (Apple Git-37)