diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java deleted file mode 100644 index 0e559e0..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveConfigContext.java +++ /dev/null @@ -1,37 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.optimizer.calcite; - -import org.apache.calcite.plan.Context; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; - - -public class HiveConfigContext implements Context { - private HiveAlgorithmsConf config; - - public HiveConfigContext(HiveAlgorithmsConf config) { - this.config = config; - } - - public T unwrap(Class clazz) { - if (clazz.isInstance(config)) { - return clazz.cast(config); - } - return null; - } -} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java new file mode 100644 index 0000000..ad79aee --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveHepPlannerContext.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; + + +public class HiveHepPlannerContext implements Context { + private HiveRulesRegistry registry; + + public HiveHepPlannerContext(HiveRulesRegistry registry) { + this.registry = registry; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(registry)) { + return clazz.cast(registry); + } + return null; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java new file mode 100644 index 0000000..8859fc2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveVolcanoPlannerContext.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite; + +import org.apache.calcite.plan.Context; +import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveAlgorithmsConf; + + +public class HiveVolcanoPlannerContext implements Context { + private HiveAlgorithmsConf config; + + public HiveVolcanoPlannerContext(HiveAlgorithmsConf config) { + this.config = config; + } + + public T unwrap(Class clazz) { + if (clazz.isInstance(config)) { + return clazz.cast(config); + } + return null; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java index a39ded2..8610edc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/cost/HiveVolcanoPlanner.java @@ -22,7 +22,7 @@ import org.apache.calcite.plan.RelOptPlanner; import org.apache.calcite.plan.volcano.VolcanoPlanner; import org.apache.calcite.rel.RelCollationTraitDef; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext; /** * Refinement of {@link org.apache.calcite.plan.volcano.VolcanoPlanner} for Hive. @@ -35,11 +35,11 @@ private static final boolean ENABLE_COLLATION_TRAIT = true; /** Creates a HiveVolcanoPlanner. */ - public HiveVolcanoPlanner(HiveConfigContext conf) { + public HiveVolcanoPlanner(HiveVolcanoPlannerContext conf) { super(HiveCost.FACTORY, conf); } - public static RelOptPlanner createPlanner(HiveConfigContext conf) { + public static RelOptPlanner createPlanner(HiveVolcanoPlannerContext conf) { final VolcanoPlanner planner = new HiveVolcanoPlanner(conf); planner.addRelTraitDef(ConventionTraitDef.INSTANCE); if (ENABLE_COLLATION_TRAIT) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java index 3e2311c..1c5a089 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java @@ -18,10 +18,8 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; import java.util.ArrayList; -import java.util.Collection; import java.util.EnumSet; import java.util.List; -import java.util.Map.Entry; import java.util.Set; import org.apache.calcite.plan.RelOptPredicateList; @@ -47,6 +45,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; +import com.google.common.collect.Sets; public class HivePreFilteringRule extends RelOptRule { @@ -76,14 +75,38 @@ private HivePreFilteringRule() { this.filterFactory = HiveFilter.DEFAULT_FILTER_FACTORY; } - public void onMatch(RelOptRuleCall call) { + @Override + public boolean matches(RelOptRuleCall call) { final Filter filter = call.rel(0); final RelNode filterChild = call.rel(1); - // 0. If the filter is already on top of a TableScan, - // we can bail out + // If the filter is already on top of a TableScan, + // we can bail out if (filterChild instanceof TableScan) { - return; + return false; + } + + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + + // If this operator has been visited already by the rule, + // we do not need to apply the optimization + if (registry != null && registry.getVisited(this).contains(filter)) { + return false; + } + + return true; + } + + @Override + public void onMatch(RelOptRuleCall call) { + final Filter filter = call.rel(0); + + // 0. Register that we have visited this operator in this rule + HiveRulesRegistry registry = call.getPlanner(). + getContext().unwrap(HiveRulesRegistry.class); + if (registry != null) { + registry.registerVisited(this, filter); } final RexBuilder rexBuilder = filter.getCluster().getRexBuilder(); @@ -97,12 +120,12 @@ public void onMatch(RelOptRuleCall call) { ImmutableList operands = RexUtil.flattenAnd(((RexCall) condition).getOperands()); for (RexNode operand: operands) { if (operand.getKind() == SqlKind.OR) { - commonOperands.addAll(extractCommonOperands(rexBuilder,operand)); + commonOperands.addAll(extractCommonOperands(rexBuilder,filter,operand)); } } break; case OR: - commonOperands = extractCommonOperands(rexBuilder,condition); + commonOperands = extractCommonOperands(rexBuilder,filter,condition); break; default: return; @@ -114,7 +137,7 @@ public void onMatch(RelOptRuleCall call) { } // 3. If the new conjuncts are already present in the plan, we bail out - final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter); + final RelOptPredicateList predicates = RelMetadataQuery.getPulledUpPredicates(filter.getInput()); final List newConjuncts = new ArrayList<>(); for (RexNode commonOperand : commonOperands) { boolean found = false; @@ -137,65 +160,87 @@ public void onMatch(RelOptRuleCall call) { RexUtil.composeConjunction(rexBuilder, newConjuncts, false)); // 5. We create the new filter that might be pushed down - RelNode newFilter = filterFactory.createFilter(filterChild, newCondition); + RelNode newFilter = filterFactory.createFilter(filter.getInput(), newCondition); RelNode newTopFilter = filterFactory.createFilter(newFilter, condition); + // 6. We register both so we do not fire the rule on them again + if (registry != null) { + registry.registerVisited(this, newFilter); + registry.registerVisited(this, newTopFilter); + } + call.transformTo(newTopFilter); } - private static List extractCommonOperands(RexBuilder rexBuilder, RexNode condition) { + private static List extractCommonOperands(RexBuilder rexBuilder, Filter filter, + RexNode condition) { assert condition.getKind() == SqlKind.OR; Multimap reductionCondition = LinkedHashMultimap.create(); + // Data structure to control whether a certain reference is present in every operand + Set refsInAllOperands = null; + // 1. We extract the information necessary to create the predicate for the new // filter; currently we support comparison functions, in and between ImmutableList operands = RexUtil.flattenOr(((RexCall) condition).getOperands()); - for (RexNode operand: operands) { + for (int i = 0; i < operands.size(); i++) { + RexNode operand = operands.get(0); + final RexNode operandCNF = RexUtil.toCnf(rexBuilder, operand); final List conjunctions = RelOptUtil.conjunctions(operandCNF); - boolean addedToReductionCondition = false; // Flag to control whether we have added a new factor - // to the reduction predicate + + // If there is only one conjunct, we can bail out; this avoids returning + // a condition equal to the input condition + if (conjunctions.size() < 2) { + return new ArrayList<>(); + } + + final Set refsInCurrentOperand = Sets.newHashSet(); + for (RexNode conjunction: conjunctions) { if (!(conjunction instanceof RexCall)) { continue; } RexCall conjCall = (RexCall) conjunction; + RexNode ref = null; if(COMPARISON.contains(conjCall.getOperator().getKind())) { if (conjCall.operands.get(0) instanceof RexInputRef && conjCall.operands.get(1) instanceof RexLiteral) { - reductionCondition.put(conjCall.operands.get(0).toString(), - conjCall); - addedToReductionCondition = true; + ref = conjCall.operands.get(0); } else if (conjCall.operands.get(1) instanceof RexInputRef && conjCall.operands.get(0) instanceof RexLiteral) { - reductionCondition.put(conjCall.operands.get(1).toString(), - conjCall); - addedToReductionCondition = true; + ref = conjCall.operands.get(1); } } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) { - reductionCondition.put(conjCall.operands.get(0).toString(), - conjCall); - addedToReductionCondition = true; + ref = conjCall.operands.get(0); } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) { - reductionCondition.put(conjCall.operands.get(1).toString(), - conjCall); - addedToReductionCondition = true; + ref = conjCall.operands.get(1); + } + + if (ref != null) { + String stringRef = ref.toString(); + reductionCondition.put(stringRef, conjCall); + refsInCurrentOperand.add(stringRef); } } - // If we did not add any factor, we can bail out - if (!addedToReductionCondition) { + // Updates the references that are present in every operand up till now + if (i == 0) { + refsInAllOperands = refsInCurrentOperand; + } else { + refsInAllOperands = Sets.intersection(refsInAllOperands, refsInCurrentOperand); + } + // If we did not add any factor or there are no common factors, we can bail out + if (refsInAllOperands.isEmpty()) { return new ArrayList<>(); } } // 2. We gather the common factors and return them List commonOperands = new ArrayList<>(); - for (Entry> pair : reductionCondition.asMap().entrySet()) { - if (pair.getValue().size() == operands.size()) { - commonOperands.add(RexUtil.composeDisjunction(rexBuilder, pair.getValue(), false)); - } + for (String ref : refsInAllOperands) { + commonOperands.add(RexUtil.composeDisjunction(rexBuilder, reductionCondition.get(ref), false)); } return commonOperands; } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java new file mode 100644 index 0000000..18a065e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRulesRegistry.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import java.util.Set; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.rel.RelNode; + +import com.google.common.collect.HashMultimap; +import com.google.common.collect.SetMultimap; + +public class HiveRulesRegistry { + + private SetMultimap registry; + + public HiveRulesRegistry() { + this.registry = HashMultimap.create(); + } + + public void registerVisited(RelOptRule rule, RelNode operator) { + this.registry.put(rule, operator); + } + + public Set getVisited(RelOptRule rule) { + return this.registry.get(rule); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9c731b8..df7189a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -63,7 +63,6 @@ import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.AggregateJoinTransposeRule; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; @@ -118,8 +117,9 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException; import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil; -import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfigContext; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveVolcanoPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveDefaultRelMetadataProvider; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveHepPlannerContext; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveTypeSystemImpl; import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable; import org.apache.hadoop.hive.ql.optimizer.calcite.TraitsUtil; @@ -151,6 +151,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveProjectMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -841,7 +842,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu final Double maxMemory = (double) HiveConf.getLongVar( conf, HiveConf.ConfVars.HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD); HiveAlgorithmsConf algorithmsConf = new HiveAlgorithmsConf(maxSplitSize, maxMemory); - HiveConfigContext confContext = new HiveConfigContext(algorithmsConf); + HiveVolcanoPlannerContext confContext = new HiveVolcanoPlannerContext(algorithmsConf); RelOptPlanner planner = HiveVolcanoPlanner.createPlanner(confContext); final RelOptQuery query = new RelOptQuery(planner); final RexBuilder rexBuilder = cluster.getRexBuilder(); @@ -1061,7 +1062,9 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, RelMetadata programBuilder.addRuleInstance(r); } - HepPlanner planner = new HepPlanner(programBuilder.build()); + HiveRulesRegistry registry = new HiveRulesRegistry(); + HiveHepPlannerContext context = new HiveHepPlannerContext(registry); + HepPlanner planner = new HepPlanner(programBuilder.build(), context); List list = Lists.newArrayList(); list.add(mdProvider); planner.registerMetadataProviders(list);