diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index ace0e9cea4b6cd59c085b6f77ddcea54cb0f2d20..2ea5865bfdf5acee9863f3c642feebc691fbb748 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -5128,6 +5128,7 @@ private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, colExprMap); int keyLength = reduceKeys.size(); + int numOfColsRmedFromkey = grpByExprs.size() - keyLength; // add a key for reduce sink if (groupingSetsPresent) { @@ -5157,7 +5158,7 @@ private ReduceSinkOperator genGroupByPlanReduceSinkOperator(QB qb, reduceSinkOutputRowResolver, outputValueColumnNames, reduceValues, colExprMap); } else { // Put partial aggregation results in reduceValues - int inputField = reduceKeys.size(); + int inputField = reduceKeys.size() + numOfColsRmedFromkey; for (Map.Entry entry : aggregationTrees.entrySet()) { diff --git a/ql/src/test/queries/clientpositive/groupby_nullvalues.q b/ql/src/test/queries/clientpositive/groupby_nullvalues.q new file mode 100644 index 0000000000000000000000000000000000000000..85ccb38dbd83879503a3d3de9ace512963ab5241 --- /dev/null +++ b/ql/src/test/queries/clientpositive/groupby_nullvalues.q @@ -0,0 +1,29 @@ +set hive.cbo.enable=false; +drop table if exists paqtest; +CREATE TABLE paqtest( +c1 int, +s1 string, +s2 string, +bn1 bigint) +ROW FORMAT SERDE +'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; + +insert into paqtest values (58, '', 'ABC', 0); + +SELECT +'PM' AS cy, +c1, +NULL AS iused, +NULL AS itp, +s2, +NULL AS cvg, +NULL AS acavg, +sum(bn1) AS cca +FROM paqtest +WHERE (s1 IS NULL OR length(s1) = 0) +GROUP BY 'Pricing mismatch', c1, NULL, NULL, s2, NULL, NULL; +drop table paqtest; diff --git a/ql/src/test/results/clientpositive/groupby_nullvalues.q.out b/ql/src/test/results/clientpositive/groupby_nullvalues.q.out new file mode 100644 index 0000000000000000000000000000000000000000..aae2a1247e7e46ae1ffe7f38da4e522b0e5c65ba --- /dev/null +++ b/ql/src/test/results/clientpositive/groupby_nullvalues.q.out @@ -0,0 +1,83 @@ +PREHOOK: query: drop table if exists paqtest +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists paqtest +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE paqtest( +c1 int, +s1 string, +s2 string, +bn1 bigint) +ROW FORMAT SERDE +'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@paqtest +POSTHOOK: query: CREATE TABLE paqtest( +c1 int, +s1 string, +s2 string, +bn1 bigint) +ROW FORMAT SERDE +'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@paqtest +PREHOOK: query: insert into paqtest values (58, '', 'ABC', 0) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@paqtest +POSTHOOK: query: insert into paqtest values (58, '', 'ABC', 0) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@paqtest +POSTHOOK: Lineage: paqtest.bn1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: paqtest.c1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: paqtest.s1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: paqtest.s2 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: SELECT +'PM' AS cy, +c1, +NULL AS iused, +NULL AS itp, +s2, +NULL AS cvg, +NULL AS acavg, +sum(bn1) AS cca +FROM paqtest +WHERE (s1 IS NULL OR length(s1) = 0) +GROUP BY 'Pricing mismatch', c1, NULL, NULL, s2, NULL, NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@paqtest +#### A masked pattern was here #### +POSTHOOK: query: SELECT +'PM' AS cy, +c1, +NULL AS iused, +NULL AS itp, +s2, +NULL AS cvg, +NULL AS acavg, +sum(bn1) AS cca +FROM paqtest +WHERE (s1 IS NULL OR length(s1) = 0) +GROUP BY 'Pricing mismatch', c1, NULL, NULL, s2, NULL, NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@paqtest +#### A masked pattern was here #### +PM 58 NULL NULL ABC NULL NULL 0 +PREHOOK: query: drop table paqtest +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@paqtest +PREHOOK: Output: default@paqtest +POSTHOOK: query: drop table paqtest +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@paqtest +POSTHOOK: Output: default@paqtest