From 8ce91c156541e8f1198fe30f807b91428e31e9f8 Mon Sep 17 00:00:00 2001 From: Ashutosh Chauhan Date: Tue, 30 Jun 2015 15:20:13 -0700 Subject: [PATCH] HIVE-11151 : Calcite transitive predicate inference rule should pay attention to its input rel type --- .../HiveJoinPushTransitivePredicatesRule.java | 135 +++++++++++++++++++++ .../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +- 2 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java new file mode 100644 index 0000000..4050c09 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.calcite.plan.RelOptPredicateList; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.RelFactories; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.util.Util; + +import com.google.common.collect.ImmutableList; + +/** + * Planner rule that infers predicates from on a + * {@link org.apache.calcite.rel.core.Join} and creates + * {@link org.apache.calcite.rel.core.Filter}s if those predicates can be pushed + * to its inputs. + * + *

Uses {@link org.apache.calcite.rel.metadata.RelMdPredicates} to infer + * the predicates, + * returns them in a {@link org.apache.calcite.plan.RelOptPredicateList} + * and applies them appropriately. + */ +public class HiveJoinPushTransitivePredicatesRule extends RelOptRule { + private final RelFactories.FilterFactory filterFactory; + + /** The singleton. */ + public static final HiveJoinPushTransitivePredicatesRule INSTANCE = + new HiveJoinPushTransitivePredicatesRule(Join.class, + RelFactories.DEFAULT_FILTER_FACTORY); + + public HiveJoinPushTransitivePredicatesRule(Class clazz, + RelFactories.FilterFactory filterFactory) { + super(operand(clazz, any())); + this.filterFactory = filterFactory; + } + + @Override public void onMatch(RelOptRuleCall call) { + Join join = call.rel(0); + RelOptPredicateList preds = RelMetadataQuery.getPulledUpPredicates(join); + + RexBuilder rB = join.getCluster().getRexBuilder(); + RelNode lChild = join.getLeft(); + RelNode rChild = join.getRight(); + + List leftPreds = getValidPreds(preds.leftInferredPredicates, ((HepRelVertex)lChild).getCurrentRel().getRowType().getFieldList()); + List rightPreds = getValidPreds(preds.rightInferredPredicates, ((HepRelVertex)rChild).getCurrentRel().getRowType().getFieldList()); + + if (leftPreds.isEmpty() && rightPreds.isEmpty()) { + return; + } + + if (leftPreds.size() > 0) { + RelNode curr = lChild; + lChild = filterFactory.createFilter(lChild, RexUtil.composeConjunction(rB, leftPreds, false)); + call.getPlanner().onCopy(curr, lChild); + } + + if (rightPreds.size() > 0) { + RelNode curr = rChild; + rChild = filterFactory.createFilter(rChild, RexUtil.composeConjunction(rB, rightPreds, false)); + call.getPlanner().onCopy(curr, rChild); + } + + RelNode newRel = join.copy(join.getTraitSet(), join.getCondition(), + lChild, rChild, join.getJoinType(), join.isSemiJoinDone()); + call.getPlanner().onCopy(join, newRel); + + call.transformTo(newRel); + } + + private ImmutableList getValidPreds (List rexs, List types) { + InputRefValidator validator = new InputRefValidator(types); + List valids = new ArrayList(rexs.size()); + for (RexNode rex : rexs) { + try { + rex.accept(validator); + valids.add(rex); + } catch (Util.FoundOne e) { + Util.swallow(e, null); + } + } + return ImmutableList.copyOf(valids); + } + + private static class InputRefValidator extends RexVisitorImpl { + + private final List types; + protected InputRefValidator(List types) { + super(true); + this.types = types; + } + + @Override + public Void visitCall(RexCall call) { + + if("isnotnull".equals(call.getOperator().getName())) { + if(call.getOperands().get(0) instanceof RexInputRef && !types.get(((RexInputRef)call.getOperands().get(0)).getIndex()).getType().isNullable()) { + // No need to add not null filter for a constant. + throw new Util.FoundOne(call); + } + } + return super.visitCall(call); + } + } +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index a73e24e..7fd8c85 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -140,6 +140,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -265,7 +266,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept // here with SemiJoins not all tables are costed in CBO, so their // PartitionList is not evaluated until the run phase. getMetaData(getQB()); - + disableJoinMerge = false; sinkOp = genPlan(getQB()); LOG.info("CBO Succeeded; optimized logical plan."); @@ -965,7 +966,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveFilter.DEFAULT_FILTER_FACTORY, Aggregate.class)); // 4. Transitive inference & Partition Pruning - basePlan = hepPlan(basePlan, false, mdProvider, new JoinPushTransitivePredicatesRule( + basePlan = hepPlan(basePlan, false, mdProvider, new HiveJoinPushTransitivePredicatesRule( Join.class, HiveFilter.DEFAULT_FILTER_FACTORY), new HivePartitionPruneRule(conf)); -- 1.7.12.4 (Apple Git-37)