diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java index 802c1c1fb2..4b7139a8f7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java @@ -136,11 +136,14 @@ private boolean isReducible(final SqlKind kind) { if (SqlKind.AVG_AGG_FUNCTIONS.contains(kind)) { return true; } + if (kind == SqlKind.SUM0) { + return true; + } return false; } /** - * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in + * Reduces all calls to SUM0, AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in * the aggregates list to. * *

It handles newly generated common subexpressions since this was done @@ -205,6 +208,9 @@ private RexNode reduceAgg( final SqlKind kind = oldCall.getAggregation().getKind(); if (isReducible(kind)) { switch (kind) { + case SUM0: + // replace original SUM0(x) with COALESCE(SUM(x), 0) + return reduceSum0(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs); case AVG: // replace original AVG(x) with SUM(x) / COUNT(x) return reduceAvg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs); @@ -273,6 +279,50 @@ private AggregateCall createAggregateCallWithBinding( null); } + private RexNode reduceSum0( + Aggregate oldAggRel, + AggregateCall oldCall, + List newCalls, + Map aggCallMapping, + List inputExprs) { + final int nGroups = oldAggRel.getGroupCount(); + final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder(); + final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory(); + final int iAvgInput = oldCall.getArgList().get(0); + final RelDataType sum0InputType = typeFactory.createTypeWithNullability( + getFieldType(oldAggRel.getInput(), iAvgInput), true); + final RelDataType sumReturnType = getSumReturnType( + rexBuilder.getTypeFactory(), sum0InputType, oldCall.getType()); + final AggregateCall sumCall = + AggregateCall.create( + new HiveSqlSumAggFunction( + oldCall.isDistinct(), + ReturnTypes.explicit(sumReturnType), + oldCall.getAggregation().getOperandTypeInference(), + oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, + oldCall.isDistinct(), + oldCall.isApproximate(), + oldCall.getArgList(), + oldCall.filterArg, + oldAggRel.getGroupCount(), + oldAggRel.getInput(), + null, + null); + + RexNode refSum = + rexBuilder.addAggCall(sumCall, + nGroups, + oldAggRel.indicator, + newCalls, + aggCallMapping, + ImmutableList.of(sum0InputType)); + refSum = rexBuilder.ensureType(oldCall.getType(), refSum, true); + + final RexNode coalesce = rexBuilder.makeCall( + SqlStdOperatorTable.COALESCE, refSum, rexBuilder.makeZeroLiteral(refSum.getType())); + return rexBuilder.makeCast(oldCall.getType(), coalesce); + } + private RexNode reduceAvg( Aggregate oldAggRel, AggregateCall oldCall, diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out index 714e0d2a6c..3ed5609396 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out @@ -375,7 +375,7 @@ STAGE PLANS: outputColumnNames: _c1, a Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_c1), $sum0(a) + aggregations: sum(_c1), sum(a) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE @@ -383,18 +383,18 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), $sum0(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: (UDFToDouble(_col0) / _col1) (type: double) + expressions: (UDFToDouble(_col0) / COALESCE(_col1,0)) (type: double) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out index 7c070249fb..18efcba19f 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out @@ -232,7 +232,7 @@ STAGE PLANS: outputColumnNames: name, c, s Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(c), sum(s) + aggregations: sum(c), sum(s) keys: name (type: varchar(256)) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -243,24 +243,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: varchar(256)) Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: varchar(256)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: varchar(256)), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -437,7 +441,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1) keys: _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -448,24 +452,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out index d6685c863d..902a8d450f 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out @@ -612,7 +612,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(_col1) + aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -623,7 +623,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Map 6 Map Operator Tree: @@ -688,26 +688,30 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 11 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: $sum0(VALUE._col0) + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: $sum0(_col1) - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -734,7 +738,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -743,7 +747,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(_col1) + aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 @@ -755,16 +759,16 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 5 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: $sum0(VALUE._col0) + aggregations: sum(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), (_col1 + 1L) (type: bigint) + expressions: _col0 (type: int), (COALESCE(_col1,0) + 1L) (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out index 108471d39d..806df43721 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out @@ -389,7 +389,7 @@ STAGE PLANS: outputColumnNames: deptno, c, s Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: $sum0(c), sum(s) + aggregations: sum(c), sum(s) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 @@ -400,24 +400,28 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: $sum0(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator