diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 8e6b01bfb4..8eb5c0181e 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -74,6 +74,7 @@ import org.apache.hadoop.hive.ql.exec.mr.HadoopJobExecHelper; import org.apache.hadoop.hive.ql.exec.tez.TezJobExecHelper; import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; @@ -773,6 +774,8 @@ public int run(String[] args) throws Exception { ss.updateThreadName(); + // Initialize metadata provider class + CalcitePlanner.initializeMetadataProviderClass(); // Create views registry HiveMaterializedViewsRegistry.get().init(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java index 41c2f9e6a6..0a2714255e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveDefaultRelMetadataProvider.java @@ -17,14 +17,43 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite; +import java.util.List; +import org.apache.calcite.adapter.druid.DruidQuery; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcAggregate; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcFilter; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcJoin; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcProject; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcSort; +import org.apache.calcite.adapter.jdbc.JdbcRules.JdbcUnion; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.plan.volcano.AbstractConverter; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.convert.ConverterImpl; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; +import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveDefaultCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveOnTezCostModel; import org.apache.hadoop.hive.ql.optimizer.calcite.cost.HiveRelMdCost; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExcept; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIntersect; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveRelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortExchange; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSortLimit; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnion; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.HiveJdbcConverter; +import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.jdbc.JdbcHiveTableScan; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdColumnUniqueness; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdCollation; import org.apache.hadoop.hive.ql.optimizer.calcite.stats.HiveRelMdDistinctRowCount; @@ -41,45 +70,121 @@ public class HiveDefaultRelMetadataProvider { - private final HiveConf hiveConf; + /** + * The default metadata provider can be instantiated statically since + * it does not need any parameter specified by user (hive conf). + */ + private static final JaninoRelMetadataProvider DEFAULT = + JaninoRelMetadataProvider.of( + ChainedRelMetadataProvider.of( + ImmutableList.of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(HiveDefaultCostModel.getCostModel()).getMetadataProvider(), + HiveRelMdSelectivity.SOURCE, + HiveRelMdRowCount.SOURCE, + HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, + HiveRelMdPredicates.SOURCE, + JaninoRelMetadataProvider.DEFAULT))); + + /** + * This is the list of operators that are specifically used in Hive and + * should be loaded by the metadata providers. + */ + private static final List> HIVE_REL_NODE_CLASSES = + ImmutableList.of( + RelNode.class, + AbstractRelNode.class, + RelSubset.class, + HepRelVertex.class, + ConverterImpl.class, + AbstractConverter.class, + + HiveTableScan.class, + HiveAggregate.class, + HiveExcept.class, + HiveFilter.class, + HiveIntersect.class, + HiveJoin.class, + HiveMultiJoin.class, + HiveProject.class, + HiveRelNode.class, + HiveSemiJoin.class, + HiveSortExchange.class, + HiveSortLimit.class, + HiveTableFunctionScan.class, + HiveUnion.class, + + DruidQuery.class, + + HiveJdbcConverter.class, + JdbcHiveTableScan.class, + JdbcAggregate.class, + JdbcFilter.class, + JdbcJoin.class, + JdbcProject.class, + JdbcSort.class, + JdbcUnion.class); + + private final RelMetadataProvider metadataProvider; public HiveDefaultRelMetadataProvider(HiveConf hiveConf) { - this.hiveConf = hiveConf; + this.metadataProvider = init(hiveConf); } - public RelMetadataProvider getMetadataProvider() { - + private RelMetadataProvider init(HiveConf hiveConf) { // Create cost metadata provider - final HiveCostModel cm; - if (HiveConf.getVar(this.hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") - && HiveConf.getBoolVar(this.hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { - cm = HiveOnTezCostModel.getCostModel(hiveConf); - } else { - cm = HiveDefaultCostModel.getCostModel(); + if (HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez") + && HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVE_CBO_EXTENDED_COST_MODEL)) { + // Get max split size for HiveRelMdParallelism + final Double maxSplitSize = (double) HiveConf.getLongVar( + hiveConf, HiveConf.ConfVars.MAPREDMAXSPLITSIZE); + + // Create and return metadata provider + JaninoRelMetadataProvider metadataProvider = JaninoRelMetadataProvider.of( + ChainedRelMetadataProvider.of( + ImmutableList.of( + HiveRelMdDistinctRowCount.SOURCE, + new HiveRelMdCost(HiveOnTezCostModel.getCostModel(hiveConf)).getMetadataProvider(), + HiveRelMdSelectivity.SOURCE, + HiveRelMdRowCount.SOURCE, + HiveRelMdUniqueKeys.SOURCE, + HiveRelMdColumnUniqueness.SOURCE, + HiveRelMdSize.SOURCE, + HiveRelMdMemory.SOURCE, + new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), + HiveRelMdDistribution.SOURCE, + HiveRelMdCollation.SOURCE, + HiveRelMdPredicates.SOURCE, + JaninoRelMetadataProvider.DEFAULT))); + + metadataProvider.register(HIVE_REL_NODE_CLASSES); + + return metadataProvider; } - // Get max split size for HiveRelMdParallelism - final Double maxSplitSize = (double) HiveConf.getLongVar( - this.hiveConf, - HiveConf.ConfVars.MAPREDMAXSPLITSIZE); - - // Return MD provider - return ChainedRelMetadataProvider.of(ImmutableList - .of( - HiveRelMdDistinctRowCount.SOURCE, - new HiveRelMdCost(cm).getMetadataProvider(), - HiveRelMdSelectivity.SOURCE, - HiveRelMdRowCount.SOURCE, - HiveRelMdUniqueKeys.SOURCE, - HiveRelMdColumnUniqueness.SOURCE, - HiveRelMdSize.SOURCE, - HiveRelMdMemory.SOURCE, - new HiveRelMdParallelism(maxSplitSize).getMetadataProvider(), - HiveRelMdDistribution.SOURCE, - HiveRelMdCollation.SOURCE, - HiveRelMdPredicates.SOURCE, - DefaultRelMetadataProvider.INSTANCE)); + return DEFAULT; } + public RelMetadataProvider getMetadataProvider() { + return metadataProvider; + } + + /** + * This method can be called at startup time to pre-register all the + * additional Hive classes (compared to Calcite core classes) that may + * be visited during the planning phase. + */ + public static void initializeMetadataProviderClass() { + // This will register the classes in the default Janino implementation + JaninoRelMetadataProvider.DEFAULT.register( + HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + // This will register the classes in the default Hive implementation + DEFAULT.register(HiveDefaultRelMetadataProvider.HIVE_REL_NODE_CLASSES); + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index ed6659c6cc..bae0d53155 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -24,6 +24,7 @@ import java.util.SortedMap; import java.util.TreeMap; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.calcite.linq4j.Ord; import org.apache.calcite.plan.RelOptCost; import org.apache.calcite.plan.RelOptRuleCall; @@ -55,6 +56,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Planner rule that pushes an @@ -63,20 +66,16 @@ */ public class HiveAggregateJoinTransposeRule extends AggregateJoinTransposeRule { - /** Extended instance of the rule that can push down aggregate functions. */ - public static final HiveAggregateJoinTransposeRule INSTANCE = - new HiveAggregateJoinTransposeRule(HiveAggregate.class, HiveJoin.class, - HiveRelFactories.HIVE_BUILDER, true); + private static final Logger LOG = LoggerFactory.getLogger(HiveAggregateJoinTransposeRule.class); private final boolean allowFunctions; + private final AtomicInteger noColsMissingStats; /** Creates an AggregateJoinTransposeRule that may push down functions. */ - private HiveAggregateJoinTransposeRule(Class aggregateClass, - Class joinClass, - RelBuilderFactory relBuilderFactory, - boolean allowFunctions) { - super(aggregateClass, joinClass, relBuilderFactory, true); - this.allowFunctions = allowFunctions; + public HiveAggregateJoinTransposeRule(AtomicInteger noColsMissingStats) { + super(HiveAggregate.class, HiveJoin.class, HiveRelFactories.HIVE_BUILDER, true); + this.allowFunctions = true; + this.noColsMissingStats = noColsMissingStats; } @Override @@ -288,11 +287,21 @@ public void onMatch(RelOptRuleCall call) { } // Make a cost based decision to pick cheaper plan - RelNode r = relBuilder.build(); - RelOptCost afterCost = mq.getCumulativeCost(r); - RelOptCost beforeCost = mq.getCumulativeCost(aggregate); - if (afterCost.isLt(beforeCost)) { - call.transformTo(r); + try { + RelNode r = relBuilder.build(); + RelOptCost afterCost = mq.getCumulativeCost(r); + RelOptCost beforeCost = mq.getCumulativeCost(aggregate); + if (afterCost.isLt(beforeCost)) { + call.transformTo(r); + } + } catch (Exception e) { + boolean isMissingStats = noColsMissingStats.get() > 0; + if (isMissingStats) { + LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO"); + noColsMissingStats.set(0); + } else { + throw e; + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java new file mode 100644 index 0000000000..ac050df30b --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveFieldTrimmerRule.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.rules; + +import org.apache.calcite.plan.RelOptRule; +import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.plan.hep.HepRelVertex; +import org.apache.calcite.rel.RelNode; +import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; + + +/** + * Rule that triggers the field trimmer on the root of a plan. + */ +public class HiveFieldTrimmerRule extends RelOptRule { + + private static final HepProgram PROGRAM = new HepProgramBuilder() + .addRuleInstance(HiveHepExtractRelNodeRule.INSTANCE) + .build(); + + private final boolean fetchStats; + private boolean triggered; + + public HiveFieldTrimmerRule(boolean fetchStats) { + super(operand(RelNode.class, any()), + HiveRelFactories.HIVE_BUILDER, "HiveFieldTrimmerRule"); + this.fetchStats = fetchStats; + triggered = false; + } + + @Override + public void onMatch(RelOptRuleCall call) { + if (triggered) { + // Bail out + return; + } + + RelNode node = call.rel(0); + final HepRelVertex root = (HepRelVertex) call.getPlanner().getRoot(); + if (root.getCurrentRel() != node) { + // Bail out + return; + } + // The node is the root, release the kraken! + final HepPlanner tmpPlanner = new HepPlanner(PROGRAM); + tmpPlanner.setRoot(node); + node = tmpPlanner.findBestExp(); + final HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, + relBuilderFactory.create(node.getCluster(), null), fetchStats); + call.transformTo(fieldTrimmer.trim(node)); + triggered = true; + } + + + /** + * The goal of this rule is to extract the RelNode from the + * HepRelVertex node so the trimmer can be applied correctly. + */ + private static class HiveHepExtractRelNodeRule extends RelOptRule { + + private static final HiveHepExtractRelNodeRule INSTANCE = + new HiveHepExtractRelNodeRule(); + + private HiveHepExtractRelNodeRule() { + super(operand(HepRelVertex.class, any())); + } + + @Override + public void onMatch(RelOptRuleCall call) { + final HepRelVertex rel = call.rel(0); + call.transformTo(rel.getCurrentRel()); + } + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index f5a1c74671..4431bf1f9e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -70,7 +70,6 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; -import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider; import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataQuery; @@ -179,6 +178,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveDruidRules; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExceptRewriteRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveExpandDistinctAggregatesRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFieldTrimmerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterAggregateTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterJoinRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveFilterProjectTSTransposeRule; @@ -537,7 +537,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept if (!explainConfig.isCboJoinCost()) { // Include cost as provided by Calcite newPlan.getCluster().invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); } if (explainConfig.isFormatted()) { this.ctx.setCalcitePlan(HiveRelOptUtil.toJsonString(newPlan)); @@ -1768,17 +1768,21 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create and set MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); - RelMetadataQuery.THREAD_PROVIDERS.set( - JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(mdProvider.getMetadataProvider())); //Remove subquery - LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan before removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + } calciteGenPlan = hepPlan(calciteGenPlan, false, mdProvider.getMetadataProvider(), null, - new HiveSubQueryRemoveRule(conf)); - LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); - + HepMatchOrder.DEPTH_FIRST, new HiveSubQueryRemoveRule(conf)); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan just after removing subquery:\n" + RelOptUtil.toString(calciteGenPlan)); + } calciteGenPlan = HiveRelDecorrelator.decorrelateQuery(calciteGenPlan); - LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + if (LOG.isDebugEnabled()) { + LOG.debug("Plan after decorrelation:\n" + RelOptUtil.toString(calciteGenPlan)); + } // Validate query materialization for query results caching. This check needs // to occur before constant folding, which may remove some function calls @@ -1796,7 +1800,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, - mdProvider.getMetadataProvider(), executorProvider); + mdProvider.getMetadataProvider(), executorProvider); // 3. Materialized view based rewriting // We disable it for CTAS and MV creation queries (trying to avoid any problem @@ -1807,183 +1811,20 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu calcitePreCboPlan, mdProvider.getMetadataProvider(), executorProvider); } - // Get rid of sq_count_check if group by key is constant - if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calcitePreCboPlan = - hepPlan(calcitePreCboPlan, false, mdProvider.getMetadataProvider(), null, - HiveRemoveSqCountCheck.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removing sq_count_check UDF "); - } - - // 4. Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with // the rest of optimizations if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - - // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin - calcitePreCboPlan = hepPlan(calcitePreCboPlan, true, mdProvider.getMetadataProvider(), executorProvider, - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN, - HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, HiveProjectMergeRule.INSTANCE); - try { - List list = Lists.newArrayList(); - list.add(mdProvider.getMetadataProvider()); - RelTraitSet desiredTraits = optCluster - .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); - - HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); - hepPgmBldr.addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); - hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER)); - - HepProgram hepPgm = hepPgmBldr.build(); - HepPlanner hepPlanner = new HepPlanner(hepPgm); - - hepPlanner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - optCluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - - RelNode rootRel = calcitePreCboPlan; - hepPlanner.setRoot(rootRel); - if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { - rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); - } - hepPlanner.setRoot(rootRel); - - calciteOptimizedPlan = hepPlanner.findBestExp(); - } catch (Exception e) { - boolean isMissingStats = noColsMissingStats.get() > 0; - if (isMissingStats) { - LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO"); - noColsMissingStats.set(0); - calciteOptimizedPlan = calcitePreCboPlan; - disableSemJoinReordering = false; - } else { - throw e; - } - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering"); + calciteOptimizedPlan = applyJoinOrderingTransform(calcitePreCboPlan, + mdProvider.getMetadataProvider(), executorProvider); } else { calciteOptimizedPlan = calcitePreCboPlan; disableSemJoinReordering = false; } - // 5. Run other optimizations that do not need stats - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, - HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, HiveJoinCommuteRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Optimizations without stats 1"); - - // 6. Run aggregate-join transpose (cost based) - // If it failed because of missing stats, we continue with - // the rest of optimizations - if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - try { - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveAggregateJoinTransposeRule.INSTANCE); - } catch (Exception e) { - boolean isMissingStats = noColsMissingStats.get() > 0; - if (isMissingStats) { - LOG.warn("Missing column stats (see previous messages), skipping aggregate-join transpose in CBO"); - noColsMissingStats.set(0); - } else { - throw e; - } - } - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Aggregate join transpose"); - } - - // 7.convert Join + GBy to semijoin - // run this rule at later stages, since many calcite rules cant deal with semijoin - if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_PROJECT_SWAPPED, HiveSemiJoinRule.INSTANCE_AGGREGATE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Semijoin conversion"); - } - - // 8. convert SemiJoin + GBy to SemiJoin - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HiveRemoveGBYSemiJoinRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Removal of gby from semijoin"); - - - // 9. Run rule to fix windowing issue when it is done over - // aggregation columns (HIVE-10627) - if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Window fixing rule"); - } - - // 10. Apply Druid transformation rules - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, - HiveDruidRules.FILTER_DATE_RANGE_RULE, - HiveDruidRules.FILTER, HiveDruidRules.PROJECT_FILTER_TRANSPOSE, - HiveDruidRules.AGGREGATE_FILTER_TRANSPOSE, - HiveDruidRules.AGGREGATE_PROJECT, - HiveDruidRules.PROJECT, - HiveDruidRules.EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE, - HiveDruidRules.AGGREGATE, - HiveDruidRules.POST_AGGREGATION_PROJECT, - HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE, - HiveDruidRules.FILTER_PROJECT_TRANSPOSE, - HiveDruidRules.HAVING_FILTER_RULE, - HiveDruidRules.SORT_PROJECT_TRANSPOSE, - HiveDruidRules.SORT, - HiveDruidRules.PROJECT_SORT_TRANSPOSE - ); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Druid transformation rules"); - - if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, - HepMatchOrder.TOP_DOWN, - JDBCExtractJoinFilterRule.INSTANCE, - JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_JOIN, - JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_CONVERTER, - JDBCFilterJoinRule.INSTANCE, - JDBCJoinPushDownRule.INSTANCE, JDBCUnionPushDownRule.INSTANCE, - JDBCFilterPushDownRule.INSTANCE, JDBCProjectPushDownRule.INSTANCE, - JDBCAggregationPushDownRule.INSTANCE, JDBCSortPushDownRule.INSTANCE - ); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: JDBC transformation rules"); - } - - // 11. Run rules to aid in translation from Calcite tree to Hive tree - if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - // 12.1. Merge join into multijoin operators (if possible) - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, - HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, - HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER, - HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); - // The previous rules can pull up projections through join operators, - // thus we run the field trimmer again to push them back down - fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(optCluster, null)); - calciteOptimizedPlan = fieldTrimmer.trim(calciteOptimizedPlan); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, - new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER)); - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), null, - HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, - HiveProjectFilterPullUpConstantsRule.INSTANCE); - - // 11.2. Introduce exchange operators below join/multijoin operators - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), null, - HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, - HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Translation from Calcite tree to Hive tree"); - } + // 5. Apply post-join order optimizations + calciteOptimizedPlan = applyPostJoinOrderingTransform(calciteOptimizedPlan, + mdProvider.getMetadataProvider(), executorProvider); if (LOG.isDebugEnabled() && !conf.getBoolVar(ConfVars.HIVE_IN_TEST)) { LOG.debug("CBO Planning details:\n"); @@ -2018,37 +1859,26 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT); final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN); + final HepProgramBuilder program = new HepProgramBuilder(); + //0. SetOp rewrite - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, null, HepMatchOrder.BOTTOM_UP, + generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP, HiveProjectOverIntersectRemoveRule.INSTANCE, HiveIntersectMergeRule.INSTANCE, HiveUnionMergeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: HiveProjectOverIntersectRemoveRule, HiveIntersectMerge and HiveUnionMergeRule rules"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, HiveIntersectRewriteRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: HiveIntersectRewrite rule"); - - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, HiveExceptRewriteRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: HiveExceptRewrite rule"); //1. Distinct aggregate rewrite // Run this optimization early, since it is expanding the operator pipeline. if (!conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr") && conf.getBoolVar(HiveConf.ConfVars.HIVEOPTIMIZEDISTINCTREWRITE)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // Its not clear, if this rewrite is always performant on MR, since extra map phase // introduced for 2nd MR job may offset gains of this multi-stage aggregation. // We need a cost model for MR to enable this on MR. - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveExpandDistinctAggregatesRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, + HiveExpandDistinctAggregatesRule.INSTANCE); } // 2. Try factoring out common filter elements & separating deterministic @@ -2056,11 +1886,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // add on-clauses for old style Join Syntax // Ex: select * from R1 join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, HepMatchOrder.ARBITRARY, + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, new HivePreFilteringRule(maxCNFNodeCount)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding @@ -2104,32 +1931,26 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv rules.add(HiveSortLimitPullUpConstantsRule.INSTANCE); rules.add(HiveUnionPullUpConstantsRule.INSTANCE); rules.add(HiveAggregatePullUpConstantsRule.INSTANCE); - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, - rules.toArray(new RelOptRule[rules.size()])); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); + generatePartialProgram(program,true, HepMatchOrder.BOTTOM_UP, + rules.toArray(new RelOptRule[rules.size()])); // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. // Ex: select * from R1 left outer join R2 where ((R1.x=R2.x) and R1.y<10) or // ((R1.x=R2.x) and R1.z=10)) and rand(1) < 0.1 order by R1.x limit 10 if (conf.getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE)) { - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); // This should be a cost based decision, but till we enable the extended cost // model, we will use the given value for the variable final float reductionProportion = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE); final long reductionTuples = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HiveSortMergeRule.INSTANCE, - HiveSortProjectTransposeRule.INSTANCE, HiveSortJoinReduceRule.INSTANCE, - HiveSortUnionReduceRule.INSTANCE); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, + HiveSortMergeRule.INSTANCE, HiveSortProjectTransposeRule.INSTANCE, + HiveSortJoinReduceRule.INSTANCE, HiveSortUnionReduceRule.INSTANCE); + generatePartialProgram(program, true, HepMatchOrder.BOTTOM_UP, new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push down limit through outer join"); } // 5. Push Down Semi Joins @@ -2141,32 +1962,34 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); */ // 6. Apply Partition Pruning - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, false, mdProvider, executorProvider, new HivePartitionPruneRule(conf)); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Partition Pruning"); + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + new HivePartitionPruneRule(conf)); // 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null, - HiveRelFactories.HIVE_BUILDER.create(cluster, null), - profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)); - basePlan = fieldTrimmer.trim(basePlan); - perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Projection Pruning"); + generatePartialProgram(program,false, HepMatchOrder.TOP_DOWN, + new HiveFieldTrimmerRule(profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING))); // 8. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into // storage (incase there are filters on non partition cols). This only // matches FIL-PROJ-TS // Also merge, remove and reduce Project if possible - perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); - basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, + generatePartialProgram(program,true, HepMatchOrder.TOP_DOWN, HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, HiveProjectFilterPullUpConstantsRule.INSTANCE, HiveProjectMergeRule.INSTANCE, ProjectRemoveRule.INSTANCE, HiveSortMergeRule.INSTANCE); + + // 9. Get rid of sq_count_check if group by key is constant + if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) { + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + HiveRemoveSqCountCheck.INSTANCE); + } + + // Trigger program + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Rerun PPD"); + "Calcite: Prejoin ordering transformation"); return basePlan; } @@ -2243,7 +2066,7 @@ private RelNode copyNodeScan(RelNode scan) { // Use Calcite cost model for view rewriting optCluster.invalidateMetadataQuery(); - RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE)); + RelMetadataQuery.THREAD_PROVIDERS.set(JaninoRelMetadataProvider.DEFAULT); // Add materializations to planner for (RelOptMaterialization materialization : materializations) { @@ -2302,6 +2125,159 @@ private RelNode copyNodeScan(RelNode scan) { return basePlan; } + /** + * Perform join reordering optimization. + * + * @param basePlan + * original plan + * @param mdProvider + * meta data provider + * @param executorProvider + * executor + * @return + */ + private RelNode applyJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { + PerfLogger perfLogger = SessionState.getPerfLogger(); + + final HepProgramBuilder program = new HepProgramBuilder(); + // Remove Projects between Joins so that JoinToMultiJoinRule can merge them to MultiJoin + generatePartialProgram(program,true, HepMatchOrder.BOTTOM_UP, + HiveJoinProjectTransposeRule.LEFT_PROJECT_BTW_JOIN, HiveJoinProjectTransposeRule.RIGHT_PROJECT_BTW_JOIN, + HiveProjectMergeRule.INSTANCE); + // Join reordering + generatePartialProgram(program, false, HepMatchOrder.BOTTOM_UP, + new JoinToMultiJoinRule(HiveJoin.class), new LoptOptimizeJoinRule(HiveRelFactories.HIVE_BUILDER)); + + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + RelNode calciteOptimizedPlan; + try { + calciteOptimizedPlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); + } catch (Exception e) { + boolean isMissingStats = noColsMissingStats.get() > 0; + if (isMissingStats) { + LOG.warn("Missing column stats (see previous messages), skipping join reordering in CBO"); + noColsMissingStats.set(0); + calciteOptimizedPlan = basePlan; + disableSemJoinReordering = false; + } else { + throw e; + } + } + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Join Reordering"); + + return calciteOptimizedPlan; + } + + /** + * Perform join reordering post-optimization. + * + * @param basePlan + * original plan + * @param mdProvider + * meta data provider + * @param executorProvider + * executor + * @return + */ + private RelNode applyPostJoinOrderingTransform(RelNode basePlan, RelMetadataProvider mdProvider, RexExecutor executorProvider) { + PerfLogger perfLogger = SessionState.getPerfLogger(); + + final HepProgramBuilder program = new HepProgramBuilder(); + + // 1. Run other optimizations that do not need stats + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + ProjectRemoveRule.INSTANCE, HiveUnionMergeRule.INSTANCE, + HiveAggregateProjectMergeRule.INSTANCE, HiveProjectMergeRule.INSTANCE_NO_FORCE, + HiveJoinCommuteRule.INSTANCE); + + // 2. Run aggregate-join transpose (cost based) + // If it failed because of missing stats, we continue with + // the rest of optimizations + if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + new HiveAggregateJoinTransposeRule(noColsMissingStats)); + } + + // 3. Convert Join + GBy to semijoin + // Run this rule at later stages, since many calcite rules cant deal with semijoin + if (conf.getBoolVar(ConfVars.SEMIJOIN_CONVERSION)) { + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + HiveSemiJoinRule.INSTANCE_PROJECT, HiveSemiJoinRule.INSTANCE_PROJECT_SWAPPED, + HiveSemiJoinRule.INSTANCE_AGGREGATE); + } + + // 4. convert SemiJoin + GBy to SemiJoin + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + HiveRemoveGBYSemiJoinRule.INSTANCE); + + // 5. Run rule to fix windowing issue when it is done over + // aggregation columns (HIVE-10627) + if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + HiveWindowingFixRule.INSTANCE); + } + + // 6. Apply Druid transformation rules + generatePartialProgram(program,false, HepMatchOrder.DEPTH_FIRST, + HiveDruidRules.FILTER_DATE_RANGE_RULE, + HiveDruidRules.FILTER, HiveDruidRules.PROJECT_FILTER_TRANSPOSE, + HiveDruidRules.AGGREGATE_FILTER_TRANSPOSE, + HiveDruidRules.AGGREGATE_PROJECT, + HiveDruidRules.PROJECT, + HiveDruidRules.EXPAND_SINGLE_DISTINCT_AGGREGATES_DRUID_RULE, + HiveDruidRules.AGGREGATE, + HiveDruidRules.POST_AGGREGATION_PROJECT, + HiveDruidRules.FILTER_AGGREGATE_TRANSPOSE, + HiveDruidRules.FILTER_PROJECT_TRANSPOSE, + HiveDruidRules.HAVING_FILTER_RULE, + HiveDruidRules.SORT_PROJECT_TRANSPOSE, + HiveDruidRules.SORT, + HiveDruidRules.PROJECT_SORT_TRANSPOSE); + + // 7. Apply JDBC transformation rules + if (conf.getBoolVar(ConfVars.HIVE_ENABLE_JDBC_PUSHDOWN)) { + generatePartialProgram(program,true, HepMatchOrder.TOP_DOWN, + JDBCExtractJoinFilterRule.INSTANCE, + JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_JOIN, + JDBCAbstractSplitFilterRule.SPLIT_FILTER_ABOVE_CONVERTER, + JDBCFilterJoinRule.INSTANCE, + JDBCJoinPushDownRule.INSTANCE, JDBCUnionPushDownRule.INSTANCE, + JDBCFilterPushDownRule.INSTANCE, JDBCProjectPushDownRule.INSTANCE, + JDBCAggregationPushDownRule.INSTANCE, JDBCSortPushDownRule.INSTANCE); + } + + // 8. Run rules to aid in translation from Calcite tree to Hive tree + if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { + // 8.1. Merge join into multijoin operators (if possible) + generatePartialProgram(program,true, HepMatchOrder.BOTTOM_UP, + HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, + HiveJoinProjectTransposeRule.RIGHT_PROJECT_INCLUDE_OUTER, + HiveJoinToMultiJoinRule.INSTANCE, HiveProjectMergeRule.INSTANCE); + // The previous rules can pull up projections through join operators, + // thus we run the field trimmer again to push them back down + generatePartialProgram(program,false, HepMatchOrder.TOP_DOWN, + new HiveFieldTrimmerRule(false)); + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, + ProjectRemoveRule.INSTANCE, new ProjectMergeRule(false, HiveRelFactories.HIVE_BUILDER)); + generatePartialProgram(program, true, HepMatchOrder.TOP_DOWN, + HiveFilterProjectTSTransposeRule.INSTANCE, HiveFilterProjectTSTransposeRule.INSTANCE_DRUID, + HiveProjectFilterPullUpConstantsRule.INSTANCE); + + // 8.2. Introduce exchange operators below join/multijoin operators + generatePartialProgram(program, false, HepMatchOrder.DEPTH_FIRST, + HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); + } + + // Trigger program + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + basePlan = executeProgram(basePlan, program.build(), mdProvider, executorProvider); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Postjoin ordering transformation"); + + return basePlan; + } + private List getTablesUsed(RelNode plan) { List tablesUsed = new ArrayList<>(); new RelVisitor() { @@ -2324,44 +2300,45 @@ public void visit(RelNode node, int ordinal, RelNode parent) { * @param followPlanChanges * @param mdProvider * @param executorProvider + * @param order * @param rules * @return optimized RelNode */ + @Deprecated private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, RexExecutor executorProvider, RelOptRule... rules) { - return hepPlan(basePlan, followPlanChanges, mdProvider, executorProvider, - HepMatchOrder.TOP_DOWN, rules); + RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order, + RelOptRule... rules) { + final HepProgramBuilder programBuilder = new HepProgramBuilder(); + generatePartialProgram(programBuilder, followPlanChanges, order, rules); + return executeProgram(basePlan, programBuilder.build(), mdProvider, executorProvider); } /** - * Run the HEP Planner with the given rule set. + * Generate a HEP program with the given rule set. * - * @param basePlan - * @param followPlanChanges - * @param mdProvider - * @param executorProvider + * @param isCollection * @param order * @param rules - * @return optimized RelNode + * @return HEP program */ - private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, - RelMetadataProvider mdProvider, RexExecutor executorProvider, HepMatchOrder order, + private void generatePartialProgram(HepProgramBuilder programBuilder, boolean isCollection, HepMatchOrder order, RelOptRule... rules) { - - RelNode optimizedRelNode = basePlan; - HepProgramBuilder programBuilder = new HepProgramBuilder(); - if (followPlanChanges) { - programBuilder.addMatchOrder(order); - programBuilder = programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); + programBuilder.addMatchOrder(order); + if (isCollection) { + programBuilder.addRuleCollection(ImmutableList.copyOf(rules)); } else { - // TODO: Should this be also TOP_DOWN? - for (RelOptRule r : rules) + for (RelOptRule r : rules) { programBuilder.addRuleInstance(r); + } } + } + + private RelNode executeProgram(RelNode basePlan, HepProgram program, + RelMetadataProvider mdProvider, RexExecutor executorProvider) { // Create planner and copy context - HepPlanner planner = new HepPlanner(programBuilder.build(), - basePlan.getCluster().getPlanner().getContext()); + HepPlanner planner = new HepPlanner(program, + basePlan.getCluster().getPlanner().getContext()); List list = Lists.newArrayList(); list.add(mdProvider); @@ -2378,9 +2355,8 @@ private RelNode hepPlan(RelNode basePlan, boolean followPlanChanges, } planner.setRoot(basePlan); - optimizedRelNode = planner.findBestExp(); - return optimizedRelNode; + return planner.findBestExp(); } @SuppressWarnings("nls") @@ -5148,6 +5124,15 @@ private QBParseInfo getQBParseInfo(QB qb) throws CalciteSemanticException { } } + /** + * This method can be called at startup time to pre-register all the + * additional Hive classes (compared to Calcite core classes) that may + * be visited during the planning phase. + */ + public static void initializeMetadataProviderClass() { + HiveDefaultRelMetadataProvider.initializeMetadataProviderClass(); + } + private enum TableType { DRUID, NATIVE, diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out index 118d23b577..a96ecf7fa9 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query14.q.out @@ -242,7 +242,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) HiveFilter(condition=[=($3, 3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) HiveUnion(all=[true]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) @@ -335,7 +335,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) HiveFilter(condition=[=($3, 3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) HiveUnion(all=[true]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) @@ -428,7 +428,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) HiveFilter(condition=[=($3, 3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) + HiveProject(brand_id=[$0], class_id=[$1], category_id=[$2], $f3=[$3]) HiveUnion(all=[true]) HiveProject(i_brand_id=[$0], i_class_id=[$1], i_category_id=[$2], $f3=[$3]) HiveAggregate(group=[{4, 5, 6}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out index e5b0d19715..aac7eedab3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query23.q.out @@ -122,7 +122,7 @@ CBO PLAN: HiveSortLimit(fetch=[100]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[sum($0)]) - HiveProject($f0=[$0]) + HiveProject(sales=[$0]) HiveUnion(all=[true]) HiveProject($f0=[*(CAST($4):DECIMAL(10, 0), $5)]) HiveSemiJoin(condition=[=($3, $8)], joinType=[inner]) @@ -169,7 +169,7 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveFilter(condition=[>($3, 4)]) HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) @@ -228,7 +228,7 @@ HiveSortLimit(fetch=[100]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(substr=[$0], i_item_sk=[$1], d_date=[$2], $f3=[$3]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveFilter(condition=[>($3, 4)]) HiveProject(substr=[$2], i_item_sk=[$1], d_date=[$0], $f3=[$3]) HiveAggregate(group=[{3, 4, 5}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out index bd68baa23b..4e2fb987d3 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query30.q.out @@ -79,7 +79,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], sort4=[$4], sort5= HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($8, _UTF-16LE'IL')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(wr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(wr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out index f8e31a23aa..04aece5371 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query31.q.out @@ -154,7 +154,7 @@ HiveProject(ca_county=[$8], d_year=[CAST(2000):INTEGER], web_q1_q2_increase=[/($ HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($10, 2), =($6, 2000))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_county=[$0], $f1=[$1], ca_county0=[$2], $f10=[$3], ca_county1=[$4], $f11=[$5]) + HiveProject($f0=[$0], $f1=[$1], $f00=[$2], $f10=[$3], $f01=[$4], $f11=[$5]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ca_county=[$0], $f1=[$1]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out index e8824ddf54..a02e165ccd 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query33.q.out @@ -162,9 +162,9 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) - HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(i_manufact_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out index 987f8d3eea..7de90ed452 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query34.q.out @@ -76,7 +76,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) HiveFilter(condition=[BETWEEN(false, $2, 15, 20)]) HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) HiveAggregate(group=[{1, 4}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out index 01e87d2827..c3ca7bbcc2 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query38.q.out @@ -58,10 +58,10 @@ CBO PLAN: HiveSortLimit(fetch=[100]) HiveProject($f0=[$0]) HiveAggregate(group=[{}], agg#0=[count()]) - HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveFilter(condition=[=($3, 3)]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count($3)]) - HiveProject(c_last_name=[$0], c_first_name=[$1], d_date=[$2], $f3=[$3]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], $f3=[$3]) HiveUnion(all=[true]) HiveProject(c_last_name=[$1], c_first_name=[$0], d_date=[$2], $f3=[$3]) HiveAggregate(group=[{0, 1, 2}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out index e33203d93c..d4e288f636 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query54.q.out @@ -161,7 +161,7 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject($f0=[+($3, 1)]) HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6]) + HiveProject($f0=[$0], $f1=[$1], ca_address_sk=[$2], ca_county=[$3], ca_state=[$4], s_county=[$5], s_state=[$6]) HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_current_addr_sk=[$1]) HiveAggregate(group=[{0, 1}]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out index a13e599b34..24cb6ebb7a 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query56.q.out @@ -148,9 +148,9 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out index ea098f7567..90e147a0ad 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query60.q.out @@ -168,9 +168,9 @@ POSTHOOK: Input: default@web_sales POSTHOOK: Output: hdfs://### HDFS PATH ### CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(i_item_id=[$0], $f1=[$1]) + HiveProject($f0=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveAggregate(group=[{1}], agg#0=[sum($7)]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out index 38af7c0d35..da00e35c15 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query73.q.out @@ -70,7 +70,7 @@ HiveSortLimit(sort0=[$5], dir0=[DESC-nulls-last]) HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(c_customer_sk=[$0], c_salutation=[$7], c_first_name=[$8], c_last_name=[$9], c_preferred_cust_flag=[$10]) HiveTableScan(table=[[default, customer]], table:alias=[customer]) - HiveProject(ss_ticket_number=[$0], ss_customer_sk=[$1], $f2=[$2]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2]) HiveFilter(condition=[BETWEEN(false, $2, 1, 5)]) HiveProject(ss_ticket_number=[$1], ss_customer_sk=[$0], $f2=[$2]) HiveAggregate(group=[{1, 4}], agg#0=[count()]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out index de1b133006..abc5af3669 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query78.q.out @@ -135,7 +135,7 @@ HiveSortLimit(fetch=[100]) HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], ratio=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)], store_qty=[$2], store_wholesale_cost=[$3], store_sales_price=[$4], other_chan_qty=[+(CASE(IS NOT NULL($7), $7, 0), CASE(IS NOT NULL($11), $11, 0))], other_chan_wholesale_cost=[+(CASE(IS NOT NULL($8), $8, 0), CASE(IS NOT NULL($12), $12, 0))], other_chan_sales_price=[+(CASE(IS NOT NULL($9), $9, 0), CASE(IS NOT NULL($13), $13, 0))], ss_qty=[$2], ss_wc=[$3], ss_sp=[$4], (tok_function round (/ (tok_table_or_col ss_qty) (tok_function coalesce (+ (tok_table_or_col ws_qty) (tok_table_or_col cs_qty)) 1)) 2)=[round(/(CAST($2):DOUBLE, CAST(CASE(AND(IS NOT NULL($7), IS NOT NULL($11)), +($7, $11), 1)):DOUBLE), 2)]) HiveFilter(condition=[CASE(IS NOT NULL($11), >($11, 0), false)]) HiveJoin(condition=[=($10, $1)], joinType=[left], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4], ws_item_sk=[$5], ws_bill_customer_sk=[$6], $f20=[$7], $f30=[$8], $f40=[$9]) + HiveProject($f1=[$0], $f2=[$1], $f2_0=[$2], $f3=[$3], $f4=[$4], $f10=[$5], $f20=[$6], $f2_00=[$7], $f30=[$8], $f40=[$9]) HiveFilter(condition=[CASE(IS NOT NULL($7), >($7, 0), false)]) HiveJoin(condition=[AND(=($5, $0), =($6, $1))], joinType=[left], algorithm=[none], cost=[not available]) HiveProject(ss_item_sk=[$0], ss_customer_sk=[$1], $f2=[$2], $f3=[$3], $f4=[$4]) diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out index 98066643bf..77f9ccb0e0 100644 --- a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out +++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query81.q.out @@ -80,7 +80,7 @@ HiveProject(c_customer_id=[$0], c_salutation=[$1], c_first_name=[$2], c_last_nam HiveProject(ca_address_sk=[$0], ca_street_number=[$2], ca_street_name=[$3], ca_street_type=[$4], ca_suite_number=[$5], ca_city=[$6], ca_county=[$7], ca_zip=[$9], ca_country=[$10], ca_gmt_offset=[$11], ca_location_type=[$12]) HiveFilter(condition=[=($8, _UTF-16LE'IL')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(cr_returning_customer_sk=[$0], ca_state=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) + HiveProject($f0=[$0], $f1=[$1], $f2=[$2], _o__c0=[$3], ctr_state=[$4]) HiveJoin(condition=[AND(=($1, $4), >($2, $3))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(cr_returning_customer_sk=[$1], ca_state=[$0], $f2=[$2]) HiveAggregate(group=[{1, 3}], agg#0=[sum($5)]) diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index f9fb854106..452b8d823b 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -76,6 +76,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.events.NotificationEventPoll; +import org.apache.hadoop.hive.ql.parse.CalcitePlanner; import org.apache.hadoop.hive.ql.plan.mapper.StatsSources; import org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider; import org.apache.hadoop.hive.ql.security.authorization.PolicyProviderContainer; @@ -241,6 +242,9 @@ public void run() { LlapRegistryService.getClient(hiveConf); } + // Initialize metadata provider class + CalcitePlanner.initializeMetadataProviderClass(); + try { sessionHive = Hive.get(hiveConf); } catch (HiveException e) {