diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java new file mode 100644 index 0000000..f73affc --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinCommuteRule.java @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.Join; +import org.apache.calcite.rel.core.Project; +import org.apache.calcite.rel.rules.JoinCommuteRule; +import org.apache.calcite.util.Permutation; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; + +/** + * Planner rule that permutes the inputs of a Join, if it has a Project on top + * that simply swaps the fields of both inputs. + */ +public class HiveJoinCommuteRule extends RelOptRule { + + public static final HiveJoinCommuteRule INSTANCE = new HiveJoinCommuteRule( + HiveProject.class, HiveJoin.class); + + + public HiveJoinCommuteRule(Class projClazz, + Class joinClazz) { + super(operand(projClazz, + operand(joinClazz, any()))); + } + + public void onMatch(final RelOptRuleCall call) { + Project topProject = call.rel(0); + Join join = call.rel(1); + + // 1. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Permutation topPermutation = topProject.getPermutation(); + if (topPermutation == null) { + return; + } + if (topPermutation.isIdentity()) { + return; + } + + // 2. We swap the join + final RelNode swapped = JoinCommuteRule.swap(join,true); + if (swapped == null) { + return; + } + + // 3. The result should have a project on top, otherwise we + // bail out. + if (swapped instanceof Join) { + return; + } + + // 4. We check if it is a permutation project. If it is + // not, or this is the identity, the rule will do nothing + final Project bottomProject = (Project) swapped; + final Permutation bottomPermutation = bottomProject.getPermutation(); + if (bottomPermutation == null) { + return; + } + if (bottomPermutation.isIdentity()) { + return; + } + + // 5. If the product of the topPermutation and bottomPermutation yields + // the identity, then we can swap the join and remove the project on + // top. + final Permutation product = topPermutation.product(bottomPermutation); + if (!product.isIdentity()) { + return; + } + + // 6. Return the new join as a replacement + final Join swappedJoin = (Join) bottomProject.getInput(0); + call.transformTo(swappedJoin); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 7fd8c85..3b5dbe2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -65,7 +65,6 @@ import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; -import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; import org.apache.calcite.rel.rules.ProjectMergeRule; @@ -139,6 +138,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterSetOpTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveInsertExchange4JoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinAddNotNullRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinCommuteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; @@ -862,7 +862,16 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calciteOptimizedPlan = hepPlanner.findBestExp(); - // 4. Run rule to fix windowing issue when it is done over + // 4. Run rule to try to remove projects on top of join operators + hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); + hepPgmBldr.addRuleInstance(HiveJoinCommuteRule.INSTANCE); + hepPlanner = new HepPlanner(hepPgmBldr.build()); + hepPlanner.registerMetadataProviders(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + hepPlanner.setRoot(calciteOptimizedPlan); + calciteOptimizedPlan = hepPlanner.findBestExp(); + + // 5. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveWindowingFixRule.INSTANCE); @@ -872,9 +881,9 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPlanner.setRoot(calciteOptimizedPlan); calciteOptimizedPlan = hepPlanner.findBestExp(); - // 5. Run rules to aid in translation from Calcite tree to Hive tree + // 6. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - // 5.1. Merge join into multijoin operators (if possible) + // 6.1. Merge join into multijoin operators (if possible) hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveJoinToMultiJoinRule.INSTANCE); hepPgmBldr = hepPgmBldr.addRuleCollection(ImmutableList.of( @@ -894,7 +903,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HiveAggregate.HIVE_AGGR_REL_FACTORY, HiveUnion.UNION_REL_FACTORY); calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); - // 5.2. Introduce exchange operators below join/multijoin operators + // 6.2. Introduce exchange operators below join/multijoin operators hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN); hepPgmBldr.addRuleInstance(HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN);