diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 5d72e15..fdb51a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -66,6 +66,7 @@ import org.apache.calcite.rel.rules.JoinPushTransitivePredicatesRule; import org.apache.calcite.rel.rules.JoinToMultiJoinRule; import org.apache.calcite.rel.rules.LoptOptimizeJoinRule; +import org.apache.calcite.rel.rules.ReduceExpressionsRule; import org.apache.calcite.rel.rules.SemiJoinFilterTransposeRule; import org.apache.calcite.rel.rules.SemiJoinJoinTransposeRule; import org.apache.calcite.rel.rules.SemiJoinProjectTransposeRule; @@ -164,7 +165,7 @@ import com.google.common.collect.ImmutableList.Builder; public class CalcitePlanner extends SemanticAnalyzer { - private AtomicInteger noColsMissingStats = new AtomicInteger(0); + private final AtomicInteger noColsMissingStats = new AtomicInteger(0); private List topLevelFieldSchema; private SemanticException semanticException; private boolean runCBO = true; @@ -187,6 +188,7 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { } } + @Override @SuppressWarnings("rawtypes") Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException { Operator sinkOp = null; @@ -282,7 +284,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept /** * Can CBO handle the given AST? - * + * * @param ast * Top level AST * @param qb @@ -290,7 +292,7 @@ Operator genOPTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticExcept * @param cboCtx * @param semAnalyzer * @return boolean - * + * * Assumption:
* If top level QB is query then everything below it must also be * Query. @@ -345,7 +347,7 @@ boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { /** * Checks whether Calcite can handle the query. - * + * * @param queryProperties * @param conf * @param topLevelQB @@ -354,7 +356,7 @@ boolean canHandleAstForCbo(ASTNode ast, QB qb, PreCboCtx cboCtx) { * Whether return value should be verbose in case of failure. * @return null if the query can be handled; non-null reason string if it * cannot be. - * + * * Assumption:
* 1. If top level QB is query then everything below it must also be * Query
@@ -548,7 +550,7 @@ private static void replaceASTChild(ASTNode child, ASTNode newChild) { /** * Get Optimized AST for the given QB tree in the semAnalyzer. - * + * * @return Optimized operator tree translated in to Hive AST * @throws SemanticException */ @@ -572,7 +574,7 @@ ASTNode getOptimizedAST() throws SemanticException { /*** * Unwraps Calcite Invocation exceptions coming meta data provider chain and * obtains the real cause. - * + * * @param Exception */ private void rethrowCalciteException(Exception e) throws SemanticException { @@ -651,7 +653,7 @@ private boolean isUselessCause(Throwable t) { private class CalcitePlannerAction implements Frameworks.PlannerAction { private RelOptCluster cluster; private RelOptSchema relOptSchema; - private Map partitionCache; + private final Map partitionCache; // TODO: Do we need to keep track of RR, ColNameToPosMap for every op or // just last one. @@ -706,6 +708,10 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.JOIN_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.FILTER_INSTANCE); + hepPgmBldr.addRuleInstance(ReduceExpressionsRule.PROJECT_INSTANCE); + hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); @@ -736,7 +742,7 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu /** * Perform all optimizations before Join Ordering. - * + * * @param basePlan * original plan * @param mdProvider @@ -754,7 +760,11 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv SemiJoinFilterTransposeRule.INSTANCE, SemiJoinProjectTransposeRule.INSTANCE); // 2. PPD - basePlan = hepPlan(basePlan, true, mdProvider, new HiveFilterProjectTransposeRule( + basePlan = hepPlan(basePlan, true, mdProvider, + ReduceExpressionsRule.PROJECT_INSTANCE, + ReduceExpressionsRule.FILTER_INSTANCE, + ReduceExpressionsRule.JOIN_INSTANCE, + new HiveFilterProjectTransposeRule( Filter.class, HiveFilter.DEFAULT_FILTER_FACTORY, HiveProject.class, HiveProject.DEFAULT_PROJECT_FACTORY), new HiveFilterSetOpTransposeRule( HiveFilter.DEFAULT_FILTER_FACTORY), @@ -787,7 +797,7 @@ private RelNode applyPreJoinOrderingTransforms(RelNode basePlan, RelMetadataProv /** * Run the HEP Planner with the given rule set. - * + * * @param basePlan * @param followPlanChanges * @param mdProvider @@ -1057,7 +1067,7 @@ private RelNode genJoinRelNode(RelNode leftRel, RelNode rightRel, JoinType hiveJ /** * Generate Join Logical Plan Relnode by walking through the join AST. - * + * * @param qb * @param aliasToRel * Alias(Table/Relation alias) to RelNode; only read and not @@ -1276,7 +1286,7 @@ private RelNode genFilterRelNode(QB qb, ASTNode searchCond, RelNode srcRel, Map aliasToRel, boolean forHavingClause) throws SemanticException { /* * Handle Subquery predicates. - * + * * Notes (8/22/14 hb): Why is this a copy of the code from {@link * #genFilterPlan} - for now we will support the same behavior as non CBO * route. - but plan to allow nested SubQueries(Restriction.9.m) and @@ -1676,7 +1686,7 @@ private AggInfo getHiveAggInfo(ASTNode aggAst, int aggFnLstArgIndx, RowResolver /** * Generate GB plan. - * + * * @param qb * @param srcRel * @return TODO: 1. Grouping Sets (roll up..) @@ -1788,7 +1798,7 @@ private RelNode genGBLogicalPlan(QB qb, RelNode srcRel) throws SemanticException * Generate OB RelNode and input Select RelNode that should be used to * introduce top constraining Project. If Input select RelNode is not * present then don't introduce top constraining select. - * + * * @param qb * @param srcRel * @param outermostOB @@ -2198,7 +2208,7 @@ private RelNode genSelectRelNode(List calciteColLst, RowResolver out_rw /** * NOTE: there can only be one select caluse since we don't handle multi * destination insert. - * + * * @throws SemanticException */ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 81f174e..556e289 100644 --- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -526,13 +526,13 @@ STAGE DEPENDENCIES: Stage-17 has a backup stage: Stage-2 Stage-13 depends on stages: Stage-17 Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 + Stage-12 depends on stages: Stage-15 Stage-0 depends on stages: Stage-12 Stage-7 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-2, Stage-13 + Stage-4 depends on stages: Stage-16 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 Stage-2 STAGE PLANS: @@ -628,6 +628,70 @@ STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: + sq_1:a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + sq_1:a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + + Stage: Stage-12 + Map Reduce + Map Operator Tree: + TableScan + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + Local Work: + Map Reduce Local Work + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-7 + Stats-Aggr Operator + + Stage: Stage-16 + Map Reduce Local Work + Alias -> Map Local Tables: sq_2:s1 Fetch Operator limit: -1 @@ -701,70 +765,6 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-16 - Map Reduce Local Work - Alias -> Map Local Tables: - sq_1:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - sq_1:a - TableScan - alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key > '9') and value is not null) (type: boolean) - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: string), value (type: string) - 1 _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - Local Work: - Map Reduce Local Work - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_4 - - Stage: Stage-7 - Stats-Aggr Operator - Stage: Stage-2 Map Reduce Map Operator Tree: @@ -845,12 +845,12 @@ RUN: Stage-17:MAPREDLOCAL RUN: Stage-13:MAPRED RUN: Stage-15:MAPREDLOCAL RUN: Stage-16:MAPREDLOCAL -RUN: Stage-4:MAPRED RUN: Stage-12:MAPRED -RUN: Stage-1:MOVE +RUN: Stage-4:MAPRED RUN: Stage-0:MOVE -RUN: Stage-5:STATS +RUN: Stage-1:MOVE RUN: Stage-7:STATS +RUN: Stage-5:STATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4