diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 6528ec6..3496411 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -855,6 +855,7 @@ minillaplocal.query.files=\ optimize_nullscan.q,\ parquet_types.q,\ groupby_grouping_id2.q,\ + groupby_grouping_sets_pushdown1.q, \ constprog_semijoin.q,\ ppd_union_view.q,\ smb_mapjoin_19.q,\ @@ -1140,6 +1141,7 @@ spark.query.files=add_part_multiple.q, \ groupby_complex_types_multi_single_reducer.q, \ groupby_cube1.q, \ groupby_grouping_id2.q, \ + groupby_grouping_sets_pushdown1.q, \ groupby_map_ppr.q, \ groupby_map_ppr_multi_distinct.q, \ groupby_multi_insert_common_distinct.q, \ diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index b01a9ba..d71e48a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -95,6 +95,26 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, exp = null; } } + + // if grouping set is given (.i.e group with cube) and column is + // missed in some grouping set, we cannot push the predicate + // since the column value can be null + if (groupBy.getConf().getListGroupingSets() != null) { + List groupingIds = groupBy.getConf().getListGroupingSets(); + int groupKeyIndex = groupBy.getConf().getKeys().indexOf(exp); + if (groupKeyIndex >= 0) { + boolean hasMissedGroup = false; + for (long groupingId: groupingIds) { + if ((groupingId & (1 << groupKeyIndex)) == 0) { + hasMissedGroup = true; + break; + } + } + if (hasMissedGroup) { + exp = null; + } + } + } } if (exp == null) { // means that expression can't be pushed either because it is value in diff --git ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q new file mode 100644 index 0000000..c0e349c --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_sets_pushdown1.q @@ -0,0 +1,20 @@ +SET hive.cbo.enable=false; + +CREATE TABLE T1(a STRING, b STRING, s BIGINT); +INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456); + +-- should not pushdown, otherwise the filter 'a IS NOT NULL' will take no effect +-- SORT_QUERY_RESULTS +SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL; + +-- should pushdown +-- SORT_QUERY_RESULTS +SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL; diff --git ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out new file mode 100644 index 0000000..63c7fb4 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_sets_pushdown1.q.out @@ -0,0 +1,55 @@ +PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, s BIGINT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t1 +POSTHOOK: query: INSERT OVERWRITE TABLE T1 VALUES ('aaa', 'bbb', 123456) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.a SCRIPT [] +POSTHOOK: Lineage: t1.b SCRIPT [] +POSTHOOK: Lineage: t1.s SCRIPT [] +PREHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((), (a), (b), (a, b)) +) t WHERE a IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +aaa NULL 123456 +aaa bbb 123456 +PREHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM ( + SELECT a, b, sum(s) + FROM T1 + GROUP BY a, b GROUPING SETS ((a), (a, b)) +) t WHERE a IS NOT NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +aaa NULL 123456 +aaa bbb 123456