diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java deleted file mode 100644 index 0e559e0..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite; - -import org.apache.calcite.plan.Context; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; - - -public class HiveConfigContext implements Context { - private HiveAlgorithmsConf config; - - public HiveConfigContext(HiveAlgorithmsConf config) { - this.config = config; - } - - public T unwrap(Class clazz) { - if (clazz.isInstance(config)) { - return clazz.cast(config); - } - return null; - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java new file mode 100644 index 0000000..ad79aee --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; + + +public class HiveHepPlannerContext implements Context { + private HiveRulesRegistry registry; + + public HiveHepPlannerContext(HiveRulesRegistry registry) { + this.registry = registry; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(registry)) { + return clazz.cast(registry); + } + return null; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java new file mode 100644 index 0000000..8859fc2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; + + +public class HiveVolcanoPlannerContext implements Context { + private HiveAlgorithmsConf config; + + public HiveVolcanoPlannerContext(HiveAlgorithmsConf config) { + this.config = config; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(config)) { + return clazz.cast(config); + } + return null; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java index a39ded2..8610edc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java @@ -22,7 +22,7 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelCollationTraitDef; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext; /** * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive. @@ -35,11 +35,11 @@ private static final boolean ENABLE_COLLATION_TRAIT = true; /** Creates a HiveVolcanoPlanner. */ - public HiveVolcanoPlanner(HiveConfigContext conf) { + public HiveVolcanoPlanner(HiveVolcanoPlannerContext conf) { super(HiveCost.FACTORY, conf); } - public static RelOptPlanner createPlanner(HiveConfigContext conf) { + public static RelOptPlanner createPlanner(HiveVolcanoPlannerContext conf) { final VolcanoPlanner planner = new HiveVolcanoPlanner(conf); planner.addRelTraitDef(ConventionTraitDef.INSTANCE); if (ENABLE_COLLATION_TRAIT) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 3e2311c..349c7f8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -76,14 +76,38 @@ private HivePreFilteringRule() { this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY; } - public void onMatch(RelOptRuleCall call) { + @Override + public boolean matches(RelOptRuleCall call) { final Filter filter = call.rel(0); final RelNode filterChild = call.rel(1); - // 0. If the filter is already on top of a TableScan, - // we can bail out + // If the filter is already on top of a TableScan, + // we can bail out if (filterChild instanceof TableScan) { - return; + return false; + } + + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + + // If this operator has been visited already by the rule, + // we do not need to apply the optimization + if (registry != null && registry.getVisited(this).contains(filter)) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + // 0. Register that we have visited this operator in this rule + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + if (registry != null) { + registry.registerVisited(this, filter); } final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); @@ -114,7 +138,7 @@ public void onMatch(RelOptRuleCall call) { } // 3. If the new conjuncts are already present in the plan, we bail out - final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter); + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput()); final List newConjuncts = new ArrayList<>(); for (RexNode commonOperand : commonOperands) { boolean found = false; @@ -137,9 +161,15 @@ public void onMatch(RelOptRuleCall call) { RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); // 5. We create the new filter that might be pushed down - RelNode newFilter = filterFactory.createFilter(filterChild, newCondition); + RelNode newFilter = filterFactory.createFilter(filter.getInput(), newCondition); RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + // 6. We register both so we do not fire the rule on them again + if (registry != null) { + registry.registerVisited(this, newFilter); + registry.registerVisited(this, newTopFilter); + } + call.transformTo(newTopFilter); } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java new file mode 100644 index 0000000..18a065e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.Set; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.rel.RelNode; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.SetMultimap; + +public class HiveRulesRegistry { + + private SetMultimap registry; + + public HiveRulesRegistry() { + this.registry = HashMultimap.create(); + } + + public void registerVisited(RelOptRule rule, RelNode operator) { + this.registry.put(rule, operator); + } + + public Set getVisited(RelOptRule rule) { + return this.registry.get(rule); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9c731b8..be1d6e7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -63,7 +63,6 @@ import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.AggregateJoinTransposeRule; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; @@ -118,9 +117,10 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveHepPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; @@ -151,6 +151,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -841,7 +842,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu final Double maxMemory = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); - HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + HiveVolcanoPlannerContext confContext = new HiveVolcanoPlannerContext(algorithmsConf); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); @@ -1061,7 +1062,9 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, RelMetadata programBuilder.addRuleInstance(r); } - HepPlanner planner = new HepPlanner(programBuilder.build()); + HiveRulesRegistry registry = new HiveRulesRegistry(); + HiveHepPlannerContext context = new HiveHepPlannerContext(registry); + HepPlanner planner = new HepPlanner(programBuilder.build(), context); List list = Lists.newArrayList(); list.add(mdProvider); planner.registerMetadataProviders(list); diff --git ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java new file mode 100644 index 0000000..f1d8d1d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/optimizer/calcite/TestCBORuleFiredOnlyOnce.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import static org.junit.Assert.assertEquals; + +import java.util.List; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepMatchOrder; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; +import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rel.type.RelRecordType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; +import org.junit.Test; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +public class TestCBORuleFiredOnlyOnce { + + + @Test + public void testRuleFiredOnlyOnce() { + + HiveConf conf = new HiveConf(); + + // Create HepPlanner + HepProgramBuilder programBuilder = new HepProgramBuilder(); + programBuilder.addMatchOrder(HepMatchOrder.TOP_DOWN); + programBuilder = programBuilder.addRuleCollection( + ImmutableList.of(DummyRule.INSTANCE)); + + // Create rules registry to not trigger a rule more than once + HiveRulesRegistry registry = new HiveRulesRegistry(); + HiveHepPlannerContext context = new HiveHepPlannerContext(registry); + HepPlanner planner = new HepPlanner(programBuilder.build(), context); + + // Cluster + RexBuilder rexBuilder = new RexBuilder(new JavaTypeFactoryImpl()); + RelOptCluster cluster = RelOptCluster.create(planner, rexBuilder); + + // Create MD provider + HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); + List list = Lists.newArrayList(); + list.add(mdProvider.getMetadataProvider()); + planner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + + final RelNode node = new DummyNode(cluster, cluster.traitSet()); + + node.getCluster().setMetadataProvider( + new CachingRelMetadataProvider(chainedProvider, planner)); + + planner.setRoot(node); + + planner.findBestExp(); + + // Matches 3 times: 2 times the original node, 1 time the new node created by the rule + assertEquals(3, DummyRule.INSTANCE.numberMatches); + // It is fired only once: on the original node + assertEquals(1, DummyRule.INSTANCE.numberOnMatch); + } + + public static class DummyRule extends RelOptRule { + + public static final DummyRule INSTANCE = + new DummyRule(); + + public int numberMatches; + public int numberOnMatch; + + private DummyRule() { + super(operand(RelNode.class, any())); + numberMatches = 0; + numberOnMatch = 0; + } + + @Override + public boolean matches(RelOptRuleCall call) { + final RelNode node = call.rel(0); + + numberMatches++; + + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + + // If this operator has been visited already by the rule, + // we do not need to apply the optimization + if (registry != null && registry.getVisited(this).contains(node)) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final RelNode node = call.rel(0); + + numberOnMatch++; + + // If we have fired it already once, we return and the test will fail + if (numberOnMatch > 1) { + return; + } + + // Register that we have visited this operator in this rule + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + if (registry != null) { + registry.registerVisited(this, node); + } + + // We create a new op if it is the first time we fire the rule + final RelNode newNode = new DummyNode(node.getCluster(), node.getTraitSet()); + // We register it so we do not fire the rule on it again + if (registry != null) { + registry.registerVisited(this, newNode); + } + + call.transformTo(newNode); + + } + } + + public static class DummyNode extends AbstractRelNode { + + protected DummyNode(RelOptCluster cluster, RelTraitSet traits) { + super(cluster, cluster.traitSet()); + } + + @Override + protected RelDataType deriveRowType() { + return new RelRecordType(Lists.newArrayList()); + } + } + + +}