diff --git ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index 64efbdd..6d74981 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; @@ -84,6 +85,17 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, // replace the output expression with the input expression so that // parent op can understand this expression ExprNodeDesc exp = op.getColumnExprMap().get(colref.getColumn()); + // if the operator is a groupby and we are referencing the grouping + // id column, we cannot push the predicate + if (op instanceof GroupByOperator) { + GroupByOperator groupBy = (GroupByOperator) op; + if (groupBy.getConf().isGroupingSetsPresent()) { + int groupingSetPlaceholderPos = groupBy.getConf().getKeys().size() - 1; + if (colref.getColumn().equals(groupBy.getSchema().getColumnNames().get(groupingSetPlaceholderPos))) { + exp = null; + } + } + } if (exp == null) { // means that expression can't be pushed either because it is value in // group by diff --git ql/src/test/queries/clientpositive/groupby_grouping_id3.q ql/src/test/queries/clientpositive/groupby_grouping_id3.q new file mode 100644 index 0000000..c6746a8 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_grouping_id3.q @@ -0,0 +1,22 @@ +CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1; + +set hive.cbo.enable = false; + +-- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + +set hive.cbo.enable = true; + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1; + diff --git ql/src/test/results/clientpositive/groupby_grouping_id3.q.out ql/src/test/results/clientpositive/groupby_grouping_id3.q.out new file mode 100644 index 0000000..c305bfd --- /dev/null +++ ql/src/test/results/clientpositive/groupby_grouping_id3.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: -- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: -- SORT_QUERY_RESULTS + +SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) +FROM T1 +GROUP BY key, value +GROUPING SETS ((), (key)) +HAVING GROUPING__ID = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 1 2 +2 NULL 1 1 +3 NULL 1 2 +4 NULL 1 1