diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index de6a053..0197409 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -153,8 +153,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinPushTransitivePredicatesRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveJoinToMultiJoinRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; -import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePartitionPruneRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePointLookupOptimizerRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HivePreFilteringRule; @@ -164,10 +162,12 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRulesRegistry; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortJoinReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortLimitPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortMergeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortProjectTransposeRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortRemoveRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveSortUnionReduceRule; +import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveUnionPullUpConstantsRule; import org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveWindowingFixRule; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ASTConverter; import org.apache.hadoop.hive.ql.optimizer.calcite.translator.HiveOpConverter; @@ -956,8 +956,10 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu Executor executorProvider = new HiveRexExecutorImpl(cluster); // 2. Apply pre-join order optimizations + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); calcitePreCboPlan = applyPreJoinOrderingTransforms(calciteGenPlan, mdProvider.getMetadataProvider(), executorProvider); + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, "Calcite: Prejoin ordering transformation"); // 3. Apply join order optimizations: reordering MST algorithm // If join optimizations failed because of missing stats, we continue with @@ -1126,7 +1128,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv // We need a cost model for MR to enable this on MR. basePlan = hepPlan(basePlan, true, mdProvider, null, HiveExpandDistinctAggregatesRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Distinct aggregate rewrite"); + "Calcite: Prejoin ordering transformation - Distinct aggregate rewrite"); } // 2. Try factoring out common filter elements & separating deterministic @@ -1138,7 +1140,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, false, mdProvider, null, HepMatchOrder.ARBITRARY, HivePreFilteringRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, factor out common filter elements and separating deterministic vs non-deterministic UDF"); + "Calcite: Prejoin ordering transformation - Factor out common filter elements " + + "and separating deterministic vs non-deterministic UDF"); // 3. Run exhaustive PPD, add not null filters, transitive inference, // constant propagation, constant folding @@ -1172,7 +1175,8 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, executorProvider, HepMatchOrder.BOTTOM_UP, rules.toArray(new RelOptRule[rules.size()])); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, PPD, not null predicates, transitive inference, constant folding"); + "Calcite: Prejoin ordering transformation - PPD, not null predicates, transitive inference, " + + "constant folding"); // 4. Push down limit through outer join // NOTE: We run this after PPD to support old style join syntax. @@ -1193,7 +1197,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv new HiveSortRemoveRule(reductionProportion, reductionTuples), HiveProjectSortTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push down limit through outer join"); + "Calcite: Prejoin ordering transformation - Push down limit through outer join"); } // 5. Push Down Semi Joins @@ -1201,13 +1205,13 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv basePlan = hepPlan(basePlan, true, mdProvider, null, SemiJoinJoinTransposeRule.INSTANCE, SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Push Down Semi Joins"); + "Calcite: Prejoin ordering transformation - Push Down Semi Joins"); // 6. Apply Partition Pruning perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, null, new HivePartitionPruneRule(conf)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Partition Pruning"); + "Calcite: Prejoin ordering transformation - Partition Pruning"); // 7. Projection Pruning (this introduces select above TS & hence needs to be run last due to PP) perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); @@ -1215,14 +1219,14 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv HiveRelFactories.HIVE_BUILDER.create(cluster, null)); basePlan = fieldTrimmer.trim(basePlan); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Projection Pruning"); + "Calcite: Prejoin ordering transformation - Projection Pruning"); // 8. Merge Project-Project if possible perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); basePlan = hepPlan(basePlan, false, mdProvider, null, new ProjectMergeRule(true, HiveRelFactories.HIVE_PROJECT_FACTORY)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Merge Project-Project"); + "Calcite: Prejoin ordering transformation - Merge Project-Project"); // 9. Rerun PPD through Project as column pruning would have introduced // DT above scans; By pushing filter just above TS, Hive can push it into @@ -1233,7 +1237,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv Filter.class, HiveRelFactories.HIVE_FILTER_FACTORY, HiveProject.class, HiveRelFactories.HIVE_PROJECT_FACTORY, HiveTableScan.class)); perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, - "Calcite: Prejoin ordering transformation, Rerun PPD"); + "Calcite: Prejoin ordering transformation - Rerun PPD"); return basePlan; } @@ -3060,6 +3064,8 @@ private RelNode genLogicalPlan(QBExpr qbexpr) throws SemanticException { } private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticException { + PerfLogger perfLogger = SessionState.getPerfLogger(); + RelNode srcRel = null; RelNode filterRel = null; RelNode gbRel = null; @@ -3084,6 +3090,9 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // 1. Build Rel For Src (SubQuery, TS, Join) // 1.1. Recurse over the subqueries to fill the subquery part of the plan + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } for (String subqAlias : qb.getSubqAliases()) { QBExpr qbexpr = qb.getSubqForAlias(subqAlias); RelNode relNode = genLogicalPlan(qbexpr); @@ -3100,12 +3109,23 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept } } } + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for subqueries"); + } // 1.2 Recurse over all the source tables + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } for (String tableAlias : qb.getTabAliases()) { RelNode op = genTableLogicalPlan(tableAlias, qb); aliasToRel.put(tableAlias, op); } + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for table scan"); + } if (aliasToRel.isEmpty()) { // // This may happen for queries like select 1; (no source table) @@ -3123,40 +3143,90 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept throw new CalciteSemanticException("Unsupported", UnsupportedFeature.Others); } + // 1.3 process join + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } if (qb.getParseInfo().getJoinExpr() != null) { srcRel = genJoinLogicalPlan(qb.getParseInfo().getJoinExpr(), aliasToRel); } else { // If no join then there should only be either 1 TS or 1 SubQuery srcRel = aliasToRel.values().iterator().next(); } + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for join"); + } // 2. Build Rel for where Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } filterRel = genFilterLogicalPlan(qb, srcRel, aliasToRel, false); srcRel = (filterRel == null) ? srcRel : filterRel; RelNode starSrcRel = srcRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for filter"); + } // 3. Build Rel for GB Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } gbRel = genGBLogicalPlan(qb, srcRel); srcRel = (gbRel == null) ? srcRel : gbRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for group by"); + } // 4. Build Rel for GB Having Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } gbHavingRel = genGBHavingLogicalPlan(qb, srcRel, aliasToRel); srcRel = (gbHavingRel == null) ? srcRel : gbHavingRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for having"); + } // 5. Build Rel for Select Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } selectRel = genSelectLogicalPlan(qb, srcRel, starSrcRel); srcRel = (selectRel == null) ? srcRel : selectRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for select"); + } // 6. Build Rel for OB Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } Pair obTopProjPair = genOBLogicalPlan(qb, srcRel, outerMostQB); obRel = obTopProjPair.getKey(); RelNode topConstrainingProjArgsRel = obTopProjPair.getValue(); srcRel = (obRel == null) ? srcRel : obRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for order by"); + } // 7. Build Rel for Limit Clause + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } limitRel = genLimitLogicalPlan(qb, srcRel); srcRel = (limitRel == null) ? srcRel : limitRel; + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Build RelNode for limit"); + } // 8. Introduce top constraining select if needed. // NOTES: @@ -3173,6 +3243,9 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB) throws SemanticExcept // in the PlanModifierForASTConv we would modify the top level OB to // migrate exprs from input sel to SortRel (Note that Calcite doesn't // support this; but since we are done with Calcite at this point its OK). + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } if (topConstrainingProjArgsRel != null) { List originalInputRefs = Lists.transform(topConstrainingProjArgsRel.getRowType() .getFieldList(), new Function() { @@ -3188,10 +3261,17 @@ public RexNode apply(RelDataTypeField input) { } srcRel = genSelectRelNode(originalInputRefs, topConstrainingProjRR, srcRel); } + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Introduce top constraining project"); + } // 9. Incase this QB corresponds to subquery then modify its RR to point // to subquery alias // TODO: cleanup this + if (outerMostQB) { + perfLogger.PerfLogBegin(this.getClass().getName(), PerfLogger.OPTIMIZER); + } if (qb.getParseInfo().getAlias() != null) { RowResolver rr = this.relToHiveRR.get(srcRel); RowResolver newRR = new RowResolver(); @@ -3210,6 +3290,10 @@ public RexNode apply(RelDataTypeField input) { relToHiveRR.put(srcRel, newRR); relToHiveColNameCalcitePosMap.put(srcRel, buildHiveToCalciteColumnMap(newRR, srcRel)); } + if (outerMostQB) { + perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.OPTIMIZER, + "Calcite: Plan generation - Modify RR to point to subquery alias"); + } if (LOG.isDebugEnabled()) { LOG.debug("Created Plan for Query Block " + qb.getId());