diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java index c59af39..8646bad 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateJoinTransposeRule.java @@ -74,6 +74,12 @@ private final boolean allowFunctions; + @Override + public boolean matches(RelOptRuleCall call) { + return !call.getPlanner().getContext().unwrap(HiveRulesRegistry.class). + getVisited(this).contains(call.rel(1)); + } + /** Creates an AggregateJoinTransposeRule that may push down functions. */ private HiveAggregateJoinTransposeRule(Class aggregateClass, RelFactories.AggregateFactory aggregateFactory, @@ -92,6 +98,7 @@ private HiveAggregateJoinTransposeRule(Class aggregateClass public void onMatch(RelOptRuleCall call) { final Aggregate aggregate = call.rel(0); final Join join = call.rel(1); + call.getPlanner().getContext().unwrap(HiveRulesRegistry.class).registerVisited(this, join); final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); // If any aggregate functions do not support splitting, bail out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index c005b1a..3e201d5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -879,26 +879,26 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu list.add(mdProvider.getMetadataProvider()); RelTraitSet desiredTraits = cluster .traitSetOf(HiveRelNode.CONVENTION, RelCollations.EMPTY); - + HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(new JoinToMultiJoinRule(HiveJoin.class)); hepPgmBldr.addRuleInstance(new LoptOptimizeJoinRule(HiveJoin.HIVE_JOIN_FACTORY, HiveProject.DEFAULT_PROJECT_FACTORY, HiveFilter.DEFAULT_FILTER_FACTORY)); - + HepProgram hepPgm = hepPgmBldr.build(); HepPlanner hepPlanner = new HepPlanner(hepPgm); - + hepPlanner.registerMetadataProviders(list); RelMetadataProvider chainedProvider = ChainedRelMetadataProvider.of(list); cluster.setMetadataProvider(new CachingRelMetadataProvider(chainedProvider, hepPlanner)); - + RelNode rootRel = calcitePreCboPlan; hepPlanner.setRoot(rootRel); if (!calcitePreCboPlan.getTraitSet().equals(desiredTraits)) { rootRel = hepPlanner.changeTraits(calcitePreCboPlan, desiredTraits); } hepPlanner.setRoot(rootRel); - + calciteOptimizedPlan = hepPlanner.findBestExp(); } catch (Exception e) { boolean isMissingStats = noColsMissingStats.get() > 0; @@ -929,11 +929,13 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu // the rest of optimizations if (conf.getBoolVar(ConfVars.AGGR_JOIN_TRANSPOSE)) { try { + HiveRulesRegistry registry = new HiveRulesRegistry(); HepProgramBuilder hepPgmBldr = new HepProgramBuilder().addMatchOrder(HepMatchOrder.BOTTOM_UP); hepPgmBldr.addRuleInstance(HiveAggregateJoinTransposeRule.INSTANCE); HepProgram hepPgm = hepPgmBldr.build(); - HepPlanner hepPlanner = new HepPlanner(hepPgm); + HiveHepPlannerContext context = new HiveHepPlannerContext(registry); + HepPlanner hepPlanner = new HepPlanner(hepPgm, context); List list = Lists.newArrayList(); list.add(mdProvider.getMetadataProvider()); diff --git a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q index 096ae10..27b6ff7 100644 --- a/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q +++ b/ql/src/test/queries/clientpositive/cbo_rp_auto_join1.q @@ -3,7 +3,7 @@ set hive.stats.fetch.column.stats=true; set hive.enforce.bucketing = true; set hive.enforce.sorting = true; set hive.exec.reducers.max = 1; - +set hive.transpose.aggr.join=true; -- SORT_QUERY_RESULTS CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out index 6537a8a..59a2f12 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join1.q.out @@ -933,8 +933,10 @@ select count(*) from POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-3 depends on stages: Stage-2 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -947,41 +949,67 @@ STAGE PLANS: predicate: (key + 1) is not null (type: boolean) Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) + expressions: (key + 1) (type: int) outputColumnNames: key Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (key + 1) (type: int) - sort order: + - Map-reduce partition columns: (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: TableScan - alias: subq2:a - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (key + 1) is not null (type: boolean) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: key (type: int) - outputColumnNames: key - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: (key + 1) (type: int) - sort order: + - Map-reduce partition columns: (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) + TableScan + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: $f1 (type: bigint) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 (key + 1) (type: int) - 1 (key + 1) (type: int) - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + 0 key (type: int) + 1 key (type: int) + outputColumnNames: $f1, $f10 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE + expressions: ($f1 * $f10) (type: bigint) + outputColumnNames: $f4 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() + aggregations: $sum0($f4) mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -992,7 +1020,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -1002,7 +1030,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: $sum0(VALUE._col0) mode: mergepartial outputColumnNames: $f0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE @@ -1014,6 +1042,45 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: subq2:a + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key + 1) is not null (type: boolean) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: key + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: key (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: key, $f1 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-0 Fetch Operator limit: -1