diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java index 17d2a055cf..3c3c28886f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregatePullUpConstantsRule.java @@ -17,11 +17,28 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.rules; +import java.math.BigDecimal; +import java.util.ArrayList; +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; +import org.apache.calcite.plan.RelOptPredicateList; import org.apache.calcite.plan.RelOptRuleCall; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.core.Aggregate; import org.apache.calcite.rel.core.Aggregate.Group; +import org.apache.calcite.rel.core.AggregateCall; +import org.apache.calcite.rel.metadata.RelMetadataQuery; import org.apache.calcite.rel.rules.AggregateProjectPullUpConstantsRule; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.tools.RelBuilder; +import org.apache.calcite.util.ImmutableBitSet; +import org.apache.calcite.util.Pair; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; @@ -45,4 +62,112 @@ public boolean matches(RelOptRuleCall call) { return super.matches(call); } + public void onMatch(RelOptRuleCall call) { + final Aggregate aggregate = call.rel(0); + final RelNode input = call.rel(1); + + assert !aggregate.indicator : "predicate ensured no grouping sets"; + final int groupCount = aggregate.getGroupCount(); + if (groupCount < 1) { + // No room for optimization since we cannot convert from non-empty + // GROUP BY list to the empty one. + return; + } + + final RexBuilder rexBuilder = aggregate.getCluster().getRexBuilder(); + final RelMetadataQuery mq = call.getMetadataQuery(); + final RelOptPredicateList predicates = + mq.getPulledUpPredicates(aggregate.getInput()); + if (predicates == null) { + return; + } + final NavigableMap map = new TreeMap<>(); + for (int key : aggregate.getGroupSet()) { + final RexInputRef ref = + rexBuilder.makeInputRef(aggregate.getInput(), key); + if (predicates.constantMap.containsKey(ref)) { + map.put(key, predicates.constantMap.get(ref)); + } + } + + // None of the group expressions are constant. Nothing to do. + if (map.isEmpty()) { + return; + } + + final boolean empty = groupCount == map.size(); + + ImmutableBitSet newGroupSet = aggregate.getGroupSet(); + for (int key : map.keySet()) { + newGroupSet = newGroupSet.clear(key); + } + final int newGroupCount = newGroupSet.cardinality(); + + // If the constants are on the trailing edge of the group list, we just + // reduce the group count. + final RelBuilder relBuilder = call.builder(); + relBuilder.push(input); + + // Clone aggregate calls. + final List newAggCalls = new ArrayList<>(); + for (AggregateCall aggCall : aggregate.getAggCallList()) { + newAggCalls.add( + aggCall.adaptTo(input, aggCall.getArgList(), aggCall.filterArg, + groupCount, newGroupCount)); + } + + // Create aggregate operator. + if (empty) { + // If empty, create an additional count(*) field + Aggregate tmpAggregate = (Aggregate) relBuilder + .aggregate(relBuilder.groupKey(), relBuilder.countStar(null)) + .build(); + newAggCalls.add(tmpAggregate.getAggCallList().get(0)); + // Reset stack and create new aggregate call + relBuilder.push(tmpAggregate.getInput()); + relBuilder.aggregate(relBuilder.groupKey(), newAggCalls); + // Add a filter on the new count(*) != 0 + relBuilder.filter( + rexBuilder.makeCall(SqlStdOperatorTable.NOT_EQUALS, + relBuilder.field(relBuilder.peek().getRowType().getFieldCount() - 1), + rexBuilder.makeBigintLiteral(BigDecimal.ZERO))); + } else { + relBuilder.aggregate(relBuilder.groupKey(newGroupSet.toArray()), newAggCalls); + } + + // Create a projection back again. + List> projects = new ArrayList<>(); + int source = 0; + for (RelDataTypeField field : aggregate.getRowType().getFieldList()) { + RexNode expr; + final int i = field.getIndex(); + if (i >= groupCount) { + // Aggregate expressions' names and positions are unchanged. + expr = relBuilder.field(i - map.size()); + } else { + int pos = aggregate.getGroupSet().nth(i); + if (map.containsKey(pos)) { + // Re-generate the constant expression in the project. + RelDataType originalType = + aggregate.getRowType().getFieldList().get(projects.size()).getType(); + if (!originalType.equals(map.get(pos).getType())) { + expr = rexBuilder.makeCast(originalType, map.get(pos), true); + } else { + expr = map.get(pos); + } + } else { + // Project the aggregation expression, in its original + // position. + expr = relBuilder.field(source); + ++source; + } + } + projects.add(Pair.of(expr, field.getName())); + } + relBuilder.project(Pair.left(projects), Pair.right(projects)); // inverse + // Create top Project fixing nullability of fields + relBuilder.convert(aggregate.getRowType(), false); + call.transformTo(relBuilder.build()); + } + } diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q index ff7cefc1f9..fc69cfd50d 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_1.q @@ -167,3 +167,41 @@ select name from emps_n3 group by name; select name from emps_n3 group by name; drop materialized view mv1_n2; + +-- NEW 1 +create materialized view mv1_n2 as +select deptno, name, count(*) as c +from depts_n2 +group by deptno, name; + +explain +select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name; + +select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name; + +drop materialized view mv1_n2; + +-- NEW 2 +create materialized view mv1_n2 as +select deptno, name, locationid, count(*) as c +from depts_n2 +group by deptno, name, locationid; + +explain +select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name; + +select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name; + +drop materialized view mv1_n2; diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out index 5714ef80ca..6e4e0ec87b 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_1.q.out @@ -1016,3 +1016,253 @@ POSTHOOK: query: drop materialized view mv1_n2 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n2 POSTHOOK: Output: default@mv1_n2 +PREHOOK: query: create materialized view mv1_n2 as +select deptno, name, count(*) as c +from depts_n2 +group by deptno, name +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts_n2 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n2 +POSTHOOK: query: create materialized view mv1_n2 as +select deptno, name, count(*) as c +from depts_n2 +group by deptno, name +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n2 +PREHOOK: query: explain +select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain +select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_n2 + filterExpr: (CAST( name AS STRING) = 'Sales') (type: boolean) + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (CAST( name AS STRING) = 'Sales') (type: boolean) + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + keys: true (type: boolean) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: boolean) + sort order: + + Map-reduce partition columns: _col0 (type: boolean) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: boolean) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'Sales' (type: varchar(256)), COALESCE(_col1,0) (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 97 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: select name, count(*) as c +from depts_n2 +where name = 'Sales' +group by name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +Sales 1 +PREHOOK: query: drop materialized view mv1_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n2 +PREHOOK: Output: default@mv1_n2 +POSTHOOK: query: drop materialized view mv1_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n2 +POSTHOOK: Output: default@mv1_n2 +PREHOOK: query: create materialized view mv1_n2 as +select deptno, name, locationid, count(*) as c +from depts_n2 +group by deptno, name, locationid +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@depts_n2 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n2 +POSTHOOK: query: create materialized view mv1_n2 as +select deptno, name, locationid, count(*) as c +from depts_n2 +group by deptno, name, locationid +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n2 +PREHOOK: query: explain +select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: explain +select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_n2 + filterExpr: (CAST( name AS STRING) = 'Sales') (type: boolean) + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (CAST( name AS STRING) = 'Sales') (type: boolean) + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: deptno (type: int), c (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), 'Sales' (type: varchar(256)), COALESCE(_col1,0) (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name +PREHOOK: type: QUERY +PREHOOK: Input: default@depts_n2 +PREHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +POSTHOOK: query: select deptno, name, count(*) as c +from depts_n2 +where name = 'Sales' +group by deptno, name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@depts_n2 +POSTHOOK: Input: default@mv1_n2 +#### A masked pattern was here #### +10 Sales 1 +PREHOOK: query: drop materialized view mv1_n2 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n2 +PREHOOK: Output: default@mv1_n2 +POSTHOOK: query: drop materialized view mv1_n2 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n2 +POSTHOOK: Output: default@mv1_n2