diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java index fb65ce180c..0dea892b42 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveAggregateReduceFunctionsRule.java @@ -34,6 +34,7 @@ import org.apache.calcite.sql.SqlAggFunction; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlStdOperatorTable; +import org.apache.calcite.sql.type.InferTypes; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlTypeName; import org.apache.calcite.sql.type.SqlTypeUtil; @@ -41,10 +42,12 @@ import org.apache.calcite.util.CompositeList; import org.apache.calcite.util.ImmutableIntList; import org.apache.calcite.util.Util; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate; +import org.apache.hadoop.hive.ql.optimizer.calcite.translator.TypeConverter; import java.math.BigDecimal; import java.util.ArrayList; @@ -280,13 +283,15 @@ private RexNode reduceAvg( final RexBuilder rexBuilder = oldAggRel.getCluster().getRexBuilder(); final RelDataTypeFactory typeFactory = oldAggRel.getCluster().getTypeFactory(); final int iAvgInput = oldCall.getArgList().get(0); - RelDataType avgInputType = typeFactory.createTypeWithNullability( + final RelDataType avgInputType = typeFactory.createTypeWithNullability( getFieldType(oldAggRel.getInput(), iAvgInput), true); + final RelDataType sumRetType = getSumReturnType( + rexBuilder.getTypeFactory(), avgInputType); final AggregateCall sumCall = AggregateCall.create( new HiveSqlSumAggFunction( oldCall.isDistinct(), - oldCall.getAggregation().getReturnTypeInference(), + ReturnTypes.explicit(sumRetType), oldCall.getAggregation().getOperandTypeInference(), oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, oldCall.isDistinct(), @@ -371,17 +376,21 @@ private RexNode reduceStddev( final RexNode argRef = rexBuilder.ensureType(oldCallType, inputExprs.get(argOrdinal), false); final int argRefOrdinal = lookupOrAdd(inputExprs, argRef); + final RelDataType sumArgAggRetType = getSumReturnType( + rexBuilder.getTypeFactory(), argRef.getType()); final RexNode argSquared = rexBuilder.makeCall(SqlStdOperatorTable.MULTIPLY, argRef, argRef); final int argSquaredOrdinal = lookupOrAdd(inputExprs, argSquared); + final RelDataType sumArgSquaredAggRetType = getSumReturnType( + rexBuilder.getTypeFactory(), argSquared.getType()); final AggregateCall sumArgSquaredAggCall = createAggregateCallWithBinding(typeFactory, new HiveSqlSumAggFunction( oldCall.isDistinct(), - oldCall.getAggregation().getReturnTypeInference(), - oldCall.getAggregation().getOperandTypeInference(), + ReturnTypes.explicit(sumArgSquaredAggRetType), + InferTypes.explicit(Collections.singletonList(argSquared.getType())), oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, argSquared.getType(), oldAggRel, oldCall, argSquaredOrdinal); @@ -397,8 +406,8 @@ private RexNode reduceStddev( AggregateCall.create( new HiveSqlSumAggFunction( oldCall.isDistinct(), - oldCall.getAggregation().getReturnTypeInference(), - oldCall.getAggregation().getOperandTypeInference(), + ReturnTypes.explicit(sumArgAggRetType), + InferTypes.explicit(Collections.singletonList(argRef.getType())), oldCall.getAggregation().getOperandTypeChecker()), //SqlStdOperatorTable.SUM, oldCall.isDistinct(), oldCall.isApproximate(), @@ -532,4 +541,23 @@ private RelDataType getFieldType(RelNode relNode, int i) { relNode.getRowType().getFieldList().get(i); return inputField.getType(); } + + private RelDataType getSumReturnType(RelDataTypeFactory typeFactory, RelDataType inputType) { + switch (inputType.getSqlTypeName()) { + case TINYINT: + case SMALLINT: + case INTEGER: + case BIGINT: + return TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory); + case TIMESTAMP: + case FLOAT: + case DOUBLE: + case VARCHAR: + case CHAR: + return TypeConverter.convert(TypeInfoFactory.doubleTypeInfo, typeFactory); + case DECIMAL: + return TypeConverter.convert(TypeInfoFactory.decimalTypeInfo, typeFactory); + } + return null; + } } diff --git a/ql/src/test/queries/clientpositive/materialized_view_rewrite_10.q b/ql/src/test/queries/clientpositive/materialized_view_rewrite_10.q index 35c58ba0fc..9542792316 100644 --- a/ql/src/test/queries/clientpositive/materialized_view_rewrite_10.q +++ b/ql/src/test/queries/clientpositive/materialized_view_rewrite_10.q @@ -60,3 +60,32 @@ select salary, avg(salary) as a from emps_n10 where salary > 0 group by salary; drop materialized view mv1_n10; + +-- EXAMPLE 4 +create table emps_n10_2 ( + empid int, + deptno int, + name varchar(256), + salary tinyint, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true'); +insert into emps_n10_2 values (100, 10, 'Bill', 1, 1000), (200, 20, 'Eric', 2, 500), + (150, 10, 'Sebastian', 2, null), (110, 10, 'Theodore', 3, 250), (110, 10, 'Bill', 0, 250); +analyze table emps_n10_2 compute statistics for columns; + +create materialized view mv1_n10 as +select salary, sum(salary), count(salary) as a +from emps_n10_2 where salary > 0 group by salary; +analyze table mv1_n10 compute statistics for columns; + +explain +select avg(salary) +from emps_n10_2 where salary > 0; + +select avg(salary) +from emps_n10_2 where salary > 0; + +drop materialized view mv1_n10; + +drop table emps_n10; +drop table emps_n10_2; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out index 6982646f74..2223764999 100644 --- a/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out +++ b/ql/src/test/results/clientpositive/llap/materialized_view_rewrite_10.q.out @@ -252,3 +252,175 @@ POSTHOOK: query: drop materialized view mv1_n10 POSTHOOK: type: DROP_MATERIALIZED_VIEW POSTHOOK: Input: default@mv1_n10 POSTHOOK: Output: default@mv1_n10 +PREHOOK: query: create table emps_n10_2 ( + empid int, + deptno int, + name varchar(256), + salary tinyint, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps_n10_2 +POSTHOOK: query: create table emps_n10_2 ( + empid int, + deptno int, + name varchar(256), + salary tinyint, + commission int) +stored as orc TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps_n10_2 +PREHOOK: query: insert into emps_n10_2 values (100, 10, 'Bill', 1, 1000), (200, 20, 'Eric', 2, 500), + (150, 10, 'Sebastian', 2, null), (110, 10, 'Theodore', 3, 250), (110, 10, 'Bill', 0, 250) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@emps_n10_2 +POSTHOOK: query: insert into emps_n10_2 values (100, 10, 'Bill', 1, 1000), (200, 20, 'Eric', 2, 500), + (150, 10, 'Sebastian', 2, null), (110, 10, 'Theodore', 3, 250), (110, 10, 'Bill', 0, 250) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@emps_n10_2 +POSTHOOK: Lineage: emps_n10_2.commission SCRIPT [] +POSTHOOK: Lineage: emps_n10_2.deptno SCRIPT [] +POSTHOOK: Lineage: emps_n10_2.empid SCRIPT [] +POSTHOOK: Lineage: emps_n10_2.name SCRIPT [] +POSTHOOK: Lineage: emps_n10_2.salary SCRIPT [] +PREHOOK: query: analyze table emps_n10_2 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@emps_n10_2 +PREHOOK: Output: default@emps_n10_2 +#### A masked pattern was here #### +POSTHOOK: query: analyze table emps_n10_2 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@emps_n10_2 +POSTHOOK: Output: default@emps_n10_2 +#### A masked pattern was here #### +PREHOOK: query: create materialized view mv1_n10 as +select salary, sum(salary), count(salary) as a +from emps_n10_2 where salary > 0 group by salary +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@emps_n10_2 +PREHOOK: Output: database:default +PREHOOK: Output: default@mv1_n10 +POSTHOOK: query: create materialized view mv1_n10 as +select salary, sum(salary), count(salary) as a +from emps_n10_2 where salary > 0 group by salary +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@emps_n10_2 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv1_n10 +PREHOOK: query: analyze table mv1_n10 compute statistics for columns +PREHOOK: type: ANALYZE_TABLE +PREHOOK: Input: default@mv1_n10 +PREHOOK: Output: default@mv1_n10 +#### A masked pattern was here #### +POSTHOOK: query: analyze table mv1_n10 compute statistics for columns +POSTHOOK: type: ANALYZE_TABLE +POSTHOOK: Input: default@mv1_n10 +POSTHOOK: Output: default@mv1_n10 +#### A masked pattern was here #### +PREHOOK: query: explain +select avg(salary) +from emps_n10_2 where salary > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select avg(salary) +from emps_n10_2 where salary > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv1_n10 + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _c1 (type: bigint), a (type: bigint) + outputColumnNames: _c1, a + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_c1), $sum0(a) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), $sum0(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (UDFToDouble(_col0) / _col1) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select avg(salary) +from emps_n10_2 where salary > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps_n10_2 +PREHOOK: Input: default@mv1_n10 +#### A masked pattern was here #### +POSTHOOK: query: select avg(salary) +from emps_n10_2 where salary > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps_n10_2 +POSTHOOK: Input: default@mv1_n10 +#### A masked pattern was here #### +2.0 +PREHOOK: query: drop materialized view mv1_n10 +PREHOOK: type: DROP_MATERIALIZED_VIEW +PREHOOK: Input: default@mv1_n10 +PREHOOK: Output: default@mv1_n10 +POSTHOOK: query: drop materialized view mv1_n10 +POSTHOOK: type: DROP_MATERIALIZED_VIEW +POSTHOOK: Input: default@mv1_n10 +POSTHOOK: Output: default@mv1_n10 +PREHOOK: query: drop table emps_n10 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emps_n10 +PREHOOK: Output: default@emps_n10 +POSTHOOK: query: drop table emps_n10 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emps_n10 +POSTHOOK: Output: default@emps_n10 +PREHOOK: query: drop table emps_n10_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@emps_n10_2 +PREHOOK: Output: default@emps_n10_2 +POSTHOOK: query: drop table emps_n10_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@emps_n10_2 +POSTHOOK: Output: default@emps_n10_2