diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 9c731b8..c896723 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.BitSet; import java.util.Collections; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -63,7 +64,6 @@ import org.apache.calcite.rel.metadata.CachingRelMetadataProvider; import org.apache.calcite.rel.metadata.ChainedRelMetadataProvider; import org.apache.calcite.rel.metadata.RelMetadataProvider; -import org.apache.calcite.rel.rules.AggregateJoinTransposeRule; import org.apache.calcite.rel.rules.FilterAggregateTransposeRule; import org.apache.calcite.rel.rules.FilterProjectTransposeRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; @@ -197,7 +197,8 @@ private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private SemanticException semanticException; - private boolean runCBO = true; + private boolean runCBO = true; + private EnumSet profilesCBO; public CalcitePlanner(HiveConf conf) throws SemanticException { super(conf); @@ -238,6 +239,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept queryForCbo = cboCtx.nodeOfInterest; // nodeOfInterest is the query } runCBO = canCBOHandleAst(queryForCbo, getQB(), cboCtx); + profilesCBO = obtainCBOProfiles(queryProperties); if (runCBO) { disableJoinMerge = true; @@ -423,11 +425,10 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, boolean isInTest = conf.getBoolVar(ConfVars.HIVE_IN_TEST); boolean isStrictTest = isInTest && !conf.getVar(ConfVars.HIVEMAPREDMODE).equalsIgnoreCase("nonstrict"); - boolean hasEnoughJoins = !topLevelQB || (queryProperties.getJoinCount() > 1) || isInTest || distinctExprsExists(qb); - if (!isStrictTest && hasEnoughJoins && !queryProperties.hasClusterBy() - && !queryProperties.hasDistributeBy() && !queryProperties.hasSortBy() - && !queryProperties.hasPTF() && !queryProperties.usesScript() + if (!isStrictTest + && !queryProperties.hasClusterBy() && !queryProperties.hasDistributeBy() + && !queryProperties.hasSortBy() && !queryProperties.hasPTF() && !queryProperties.usesScript() && !queryProperties.hasMultiDestQuery() && !queryProperties.hasLateralViews()) { // Ok to run CBO. return null; @@ -438,8 +439,6 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, if (verbose) { if (isStrictTest) msg += "is in test running in mode other than nonstrict; "; - if (!hasEnoughJoins) - msg += "has too few joins; "; if (queryProperties.hasClusterBy()) msg += "has cluster by; "; if (queryProperties.hasDistributeBy()) @@ -461,6 +460,21 @@ static String canHandleQbForCbo(QueryProperties queryProperties, HiveConf conf, return msg; } + /* This method inserts the right profiles into profiles CBO depending + * on the query characteristics. */ + private static EnumSet obtainCBOProfiles(QueryProperties queryProperties) { + EnumSet profilesCBO = EnumSet.noneOf(ExtendedCBOProfile.class); + // If the query contains more than one join + if (queryProperties.getJoinCount() > 1) { + profilesCBO.add(ExtendedCBOProfile.JOIN_REORDERING); + } + // If the query contains windowing processing + if (queryProperties.hasWindowing()) { + profilesCBO.add(ExtendedCBOProfile.WINDOWING_POSTPROCESSING); + } + return profilesCBO; + } + @Override boolean continueJoinMerge() { return !runCBO; @@ -810,6 +824,11 @@ private RowResolver genRowResolver(Operator op, QB qb) { return rr; } + private enum ExtendedCBOProfile { + JOIN_REORDERING, + WINDOWING_POSTPROCESSING; + } + /** * Code responsible for Calcite plan generation and optimization. */ @@ -864,61 +883,70 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // Create MD provider HiveDefaultRelMetadataProvider mdProvider = new HiveDefaultRelMetadataProvider(conf); - // 2. Apply Pre Join Order optimizations + // 2. Apply pre-join order optimizations calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider()); - // 3. Appy Join Order Optimizations using Hep Planner (MST Algorithm) - List list = Lists.newArrayList(); - list.add(mdProvider.getMetadataProvider()); - RelTraitSet desiredTraits = cluster - .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); - - HepProgram hepPgm = null; - HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) - .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); - hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, - HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); - - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); - hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); - hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); - hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); - hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); - hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE); - if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { - hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE); - } - - hepPgm = hepPgmBldr.build(); - HepPlanner hepPlanner = new HepPlanner(hepPgm); - - hepPlanner.registerMetadataProviders(list); - RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); - cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + // 3. Apply join order optimizations + calciteOptimizedPlan = calcitePreCboPlan.copy( + calcitePreCboPlan.getTraitSet(), calcitePreCboPlan.getInputs()); + + if (profilesCBO.contains(ExtendedCBOProfile.JOIN_REORDERING)) { + // 3.1. Apply join reordering MST algorithm using HepPlanner + List list = Lists.newArrayList(); + list.add(mdProvider.getMetadataProvider()); + RelTraitSet desiredTraits = cluster + .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); + + HepProgram hepPgm = null; + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP) + .addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); + hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, + HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); + + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); + hepPgmBldr.addRuleInstance(ProjectRemoveRule.INSTANCE); + hepPgmBldr.addRuleInstance(UnionMergeRule.INSTANCE); + hepPgmBldr.addRuleInstance(new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); + hepPgmBldr.addRuleInstance(HiveAggregateProjectMergeRule.INSTANCE); + if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { + hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE); + } + + hepPgm = hepPgmBldr.build(); + HepPlanner hepPlanner = new HepPlanner(hepPgm); + + hepPlanner.registerMetadataProviders(list); + RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); + cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); + + RelNode rootRel = calcitePreCboPlan; + hepPlanner.setRoot(rootRel); + if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { + rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); + } + hepPlanner.setRoot(rootRel); + + calciteOptimizedPlan = hepPlanner.findBestExp(); - RelNode rootRel = calcitePreCboPlan; - hepPlanner.setRoot(rootRel); - if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { - rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); + // 3.2. Run rule to try to remove projects on top of join operators + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), + HepMatchOrder.BOTTOM_UP, HiveJoinCommuteRule.INSTANCE); } - hepPlanner.setRoot(rootRel); - - calciteOptimizedPlan = hepPlanner.findBestExp(); - // 4. Run rule to try to remove projects on top of join operators - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, HiveJoinCommuteRule.INSTANCE); - - // 5. Run rule to fix windowing issue when it is done over + // 4. Apply post-join order optimizations + // 4.1. Run rule to fix windowing issue when it is done over // aggregation columns (HIVE-10627) - calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), - HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE); + if (profilesCBO.contains(ExtendedCBOProfile.WINDOWING_POSTPROCESSING)) { + calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), + HepMatchOrder.BOTTOM_UP, HiveWindowingFixRule.INSTANCE); + } - // 6. Run rules to aid in translation from Calcite tree to Hive tree + // 4.2. Run rules to aid in translation from Calcite tree to Hive tree if (HiveConf.getBoolVar(conf, ConfVars.HIVE_CBO_RETPATH_HIVEOP)) { - // 6.1. Merge join into multijoin operators (if possible) + // 4.2.1. Merge join into multijoin operators (if possible) calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, true, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveJoinProjectTransposeRule.BOTH_PROJECT_INCLUDE_OUTER, HiveJoinProjectTransposeRule.LEFT_PROJECT_INCLUDE_OUTER, @@ -935,7 +963,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu HepMatchOrder.BOTTOM_UP, ProjectRemoveRule.INSTANCE, new ProjectMergeRule(false, HiveProject.DEFAULT_PROJECT_FACTORY)); - // 6.2. Introduce exchange operators below join/multijoin operators + // 4.2.2. Introduce exchange operators below join/multijoin operators calciteOptimizedPlan = hepPlan(calciteOptimizedPlan, false, mdProvider.getMetadataProvider(), HepMatchOrder.BOTTOM_UP, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_JOIN, HiveInsertExchange4JoinRule.EXCHANGE_BELOW_MULTIJOIN); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 4bec228..c2b45e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -8746,22 +8746,6 @@ private boolean matchExprLists(List list1, List list return distinctExprs; } - // see if there are any distinct expressions - protected static boolean distinctExprsExists(QB qb) { - QBParseInfo qbp = qb.getParseInfo(); - - TreeSet ks = new TreeSet(); - ks.addAll(qbp.getClauseNames()); - - for (String dest : ks) { - List list = qbp.getDistinctFuncExprsForClause(dest); - if (!list.isEmpty()) { - return true; - } - } - return false; - } - @SuppressWarnings("nls") private Operator genBodyPlan(QB qb, Operator input, Map aliasToOpInfo) throws SemanticException {