diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
index 802c1c1fb2..4b7139a8f7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java
@@ -136,11 +136,14 @@ private boolean isReducible(final SqlKind kind) {
if (SqlKind.AVG_AGG_FUNCTIONS.contains(kind)) {
return true;
}
+ if (kind == SqlKind.SUM0) {
+ return true;
+ }
return false;
}
/**
- * Reduces all calls to AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in
+ * Reduces all calls to SUM0, AVG, STDDEV_POP, STDDEV_SAMP, VAR_POP, VAR_SAMP in
* the aggregates list to.
*
*
It handles newly generated common subexpressions since this was done
@@ -205,6 +208,9 @@ private RexNode reduceAgg(
final SqlKind kind = oldCall.getAggregation().getKind();
if (isReducible(kind)) {
switch (kind) {
+ case SUM0:
+ // replace original SUM0(x) with COALESCE(SUM(x), 0)
+ return reduceSum0(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs);
case AVG:
// replace original AVG(x) with SUM(x) / COUNT(x)
return reduceAvg(oldAggRel, oldCall, newCalls, aggCallMapping, inputExprs);
@@ -273,6 +279,50 @@ private AggregateCall createAggregateCallWithBinding(
null);
}
+ private RexNode reduceSum0(
+ Aggregate oldAggRel,
+ AggregateCall oldCall,
+ List newCalls,
+ Map aggCallMapping,
+ List inputExprs) {
+ final int nGroups = oldAggRel.getGroupCount();
+ final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder();
+ final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory();
+ final int iAvgInput = oldCall.getArgList().get(0);
+ final RelDataType sum0InputType = typeFactory.createTypeWithNullability(
+ getFieldType(oldAggRel.getInput(), iAvgInput), true);
+ final RelDataType sumReturnType = getSumReturnType(
+ rexBuilder.getTypeFactory(), sum0InputType, oldCall.getType());
+ final AggregateCall sumCall =
+ AggregateCall.create(
+ new HiveSqlSumAggFunction(
+ oldCall.isDistinct(),
+ ReturnTypes.explicit(sumReturnType),
+ oldCall.getAggregation().getOperandTypeInference(),
+ oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM,
+ oldCall.isDistinct(),
+ oldCall.isApproximate(),
+ oldCall.getArgList(),
+ oldCall.filterArg,
+ oldAggRel.getGroupCount(),
+ oldAggRel.getInput(),
+ null,
+ null);
+
+ RexNode refSum =
+ rexBuilder.addAggCall(sumCall,
+ nGroups,
+ oldAggRel.indicator,
+ newCalls,
+ aggCallMapping,
+ ImmutableList.of(sum0InputType));
+ refSum = rexBuilder.ensureType(oldCall.getType(), refSum, true);
+
+ final RexNode coalesce = rexBuilder.makeCall(
+ SqlStdOperatorTable.COALESCE, refSum, rexBuilder.makeZeroLiteral(refSum.getType()));
+ return rexBuilder.makeCast(oldCall.getType(), coalesce);
+ }
+
private RexNode reduceAvg(
Aggregate oldAggRel,
AggregateCall oldCall,
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out
index 714e0d2a6c..3ed5609396 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out
@@ -375,7 +375,7 @@ STAGE PLANS:
outputColumnNames: _c1, a
Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: sum(_c1), $sum0(a)
+ aggregations: sum(_c1), sum(a)
mode: hash
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
@@ -383,18 +383,18 @@ STAGE PLANS:
sort order:
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col0 (type: bigint), _col1 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: sum(VALUE._col0), $sum0(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: (UDFToDouble(_col0) / _col1) (type: double)
+ expressions: (UDFToDouble(_col0) / COALESCE(_col1,0)) (type: double)
outputColumnNames: _col0
Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out
index 7c070249fb..18efcba19f 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_4.q.out
@@ -232,7 +232,7 @@ STAGE PLANS:
outputColumnNames: name, c, s
Statistics: Num rows: 4 Data size: 424 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: $sum0(c), sum(s)
+ aggregations: sum(c), sum(s)
keys: name (type: varchar(256))
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -243,24 +243,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: varchar(256))
Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: $sum0(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
keys: KEY._col0 (type: varchar(256))
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: varchar(256)), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 212 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
@@ -437,7 +441,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: $sum0(_col0), sum(_col1)
+ aggregations: sum(_col0), sum(_col1)
keys: _col2 (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -448,24 +452,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: $sum0(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
index d6685c863d..902a8d450f 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_7.q.out
@@ -612,7 +612,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: $sum0(_col1)
+ aggregations: sum(_col1)
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
@@ -623,7 +623,7 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Map 6
Map Operator Tree:
@@ -688,26 +688,30 @@ STAGE PLANS:
Execution mode: vectorized, llap
LLAP IO: may be used (ACID table)
Reducer 11
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: $sum0(VALUE._col0)
+ aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Group By Operator
- aggregations: $sum0(_col1)
- keys: _col0 (type: int)
- mode: hash
+ Select Operator
+ expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: +
- Map-reduce partition columns: _col0 (type: int)
+ Group By Operator
+ aggregations: sum(_col1)
+ keys: _col0 (type: int)
+ mode: hash
+ outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
- value expressions: _col1 (type: bigint)
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col1 (type: bigint)
Reducer 2
Execution mode: llap
Reduce Operator Tree:
@@ -734,7 +738,7 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reducer 3
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
aggregations: count(VALUE._col0)
@@ -743,7 +747,7 @@ STAGE PLANS:
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: $sum0(_col1)
+ aggregations: sum(_col1)
keys: _col0 (type: int)
mode: hash
outputColumnNames: _col0, _col1
@@ -755,16 +759,16 @@ STAGE PLANS:
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint)
Reducer 5
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: $sum0(VALUE._col0)
+ aggregations: sum(VALUE._col0)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: _col0 (type: int), (_col1 + 1L) (type: bigint)
+ expressions: _col0 (type: int), (COALESCE(_col1,0) + 1L) (type: bigint)
outputColumnNames: _col0, _col1
Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out
index 108471d39d..806df43721 100644
--- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out
+++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_no_join_opt.q.out
@@ -389,7 +389,7 @@ STAGE PLANS:
outputColumnNames: deptno, c, s
Statistics: Num rows: 5 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
- aggregations: $sum0(c), sum(s)
+ aggregations: sum(c), sum(s)
keys: deptno (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
@@ -400,24 +400,28 @@ STAGE PLANS:
Map-reduce partition columns: _col0 (type: int)
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col1 (type: bigint), _col2 (type: bigint)
- Execution mode: llap
+ Execution mode: vectorized, llap
LLAP IO: all inputs
Reducer 2
- Execution mode: llap
+ Execution mode: vectorized, llap
Reduce Operator Tree:
Group By Operator
- aggregations: $sum0(VALUE._col0), sum(VALUE._col1)
+ aggregations: sum(VALUE._col0), sum(VALUE._col1)
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- File Output Operator
- compressed: false
+ Select Operator
+ expressions: _col0 (type: int), COALESCE(_col1,0) (type: bigint), _col2 (type: bigint)
+ outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
- table:
- input format: org.apache.hadoop.mapred.SequenceFileInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Fetch Operator