Index: data/files/groupby_groupingid.txt =================================================================== --- data/files/groupby_groupingid.txt (revision 0) +++ data/files/groupby_groupingid.txt (working copy) @@ -0,0 +1,6 @@ +1NULL +11 +22 +33 +3NULL +45 Index: ql/src/test/results/clientpositive/groupby_grouping_id2.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_grouping_id2.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_grouping_id2.q.out (working copy) @@ -0,0 +1,225 @@ +PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1 +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 6 +1 NULL 1 2 +1 NULL 3 1 +1 1 3 1 +2 NULL 1 1 +2 2 3 1 +3 NULL 1 2 +3 NULL 3 1 +3 3 3 1 +4 NULL 1 1 +4 5 3 1 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 6 +1 NULL 1 2 +1 NULL 3 1 +1 1 3 1 +2 NULL 1 1 +2 2 3 1 +3 NULL 1 2 +3 NULL 3 1 +3 3 3 1 +4 NULL 1 1 +4 5 3 1 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 1 +1 4 +3 6 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 Index: ql/src/test/results/clientpositive/groupby_grouping_id1.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_grouping_id1.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_grouping_id1.q.out (working copy) @@ -0,0 +1,83 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 +NULL 11 2 +NULL 12 2 +NULL 13 2 +NULL 17 2 +NULL 18 2 +NULL 28 2 +1 NULL 1 +1 11 3 +2 NULL 1 +2 12 3 +3 NULL 1 +3 13 3 +7 NULL 1 +7 17 3 +8 NULL 1 +8 18 3 +8 28 3 +PREHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, key, val from T1 group by key, val with rollup +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 NULL NULL +1 1 NULL +3 1 11 +1 2 NULL +3 2 12 +1 3 NULL +3 3 13 +1 7 NULL +3 7 17 +1 8 NULL +3 8 18 +3 8 28 +PREHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 0 +NULL 11 2 2 +NULL 12 2 2 +NULL 13 2 2 +NULL 17 2 2 +NULL 18 2 2 +NULL 28 2 2 +1 NULL 1 1 +1 11 3 3 +2 NULL 1 1 +2 12 3 3 +3 NULL 1 1 +3 13 3 3 +7 NULL 1 1 +7 17 3 3 +8 NULL 1 1 +8 18 3 3 +8 28 3 3 Index: ql/src/test/results/clientnegative/groupby_grouping_id1.q.out =================================================================== --- ql/src/test/results/clientnegative/groupby_grouping_id1.q.out (revision 0) +++ ql/src/test/results/clientnegative/groupby_grouping_id1.q.out (working copy) @@ -0,0 +1,6 @@ +PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@T1 +FAILED: SemanticException [Error 10004]: Line 3:7 Invalid table alias or column reference 'GROUPING__ID': (possible column names are: key, val) Index: ql/src/test/queries/clientpositive/groupby_grouping_id2.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_grouping_id2.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_grouping_id2.q (working copy) @@ -0,0 +1,41 @@ +CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../data/files/groupby_groupingid.txt' INTO TABLE T1; + +set hive.groupby.skewindata = true; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + + + + +set hive.groupby.skewindata = false; + +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP; + +SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID; + +SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID; + + Index: ql/src/test/queries/clientpositive/groupby_grouping_id1.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_grouping_id1.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_grouping_id1.q (working copy) @@ -0,0 +1,10 @@ +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1; + +SELECT key, val, GROUPING__ID from T1 group by key, val with cube; + +SELECT GROUPING__ID, key, val from T1 group by key, val with rollup; + +SELECT key, val, GROUPING__ID, CASE WHEN GROUPING__ID == 0 THEN "0" WHEN GROUPING__ID == 1 THEN "1" WHEN GROUPING__ID == 2 THEN "2" WHEN GROUPING__ID == 3 THEN "3" ELSE "nothing" END from T1 group by key, val with cube; + Index: ql/src/test/queries/clientnegative/groupby_grouping_id1.q =================================================================== --- ql/src/test/queries/clientnegative/groupby_grouping_id1.q (revision 0) +++ ql/src/test/queries/clientnegative/groupby_grouping_id1.q (working copy) @@ -0,0 +1,4 @@ +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE; + +SELECT GROUPING__ID FROM T1; + Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (revision 1407360) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java (working copy) @@ -37,7 +37,17 @@ public static VirtualColumn RAWDATASIZE = new VirtualColumn("RAW__DATA__SIZE", (PrimitiveTypeInfo)TypeInfoFactory.longTypeInfo); + /** + * GROUPINGID is used with GROUP BY GROUPINGS SETS, ROLLUP and CUBE. + * It composes a bit vector with the "0" and "1" values for every + * column which is GROUP BY section. "1" is for a row in the result + * set if that column has been aggregated in that row. Otherwise the + * value is "0". Returns the decimal representation of the bit vector. + */ + public static VirtualColumn GROUPINGID = + new VirtualColumn("GROUPING__ID", (PrimitiveTypeInfo) TypeInfoFactory.intTypeInfo); + private String name; private PrimitiveTypeInfo typeInfo; private boolean isHidden = true; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1407360) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -221,8 +221,6 @@ //Max characters when auto generating the column name with func name private static final int AUTOGEN_COLALIAS_PRFX_MAXLENGTH = 20; - private static final String GROUPING_SET_KEY = "GROUPING_SET_KEY"; - private static class Phase1Ctx { String dest; int nextNum; @@ -2732,15 +2730,19 @@ Map colExprMap) throws SemanticException { // For grouping sets, add a dummy grouping key String groupingSetColumnName = - groupByInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName(); + groupByInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, groupingSetColumnName, null, false); groupByKeys.add(inputExpr); String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put(null, GROUPING_SET_KEY, - new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false)); + groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), + new ColumnInfo( + field, + TypeInfoFactory.stringTypeInfo, + null, + true)); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -2759,7 +2761,7 @@ Map colExprMap) throws SemanticException { // add a key for reduce sink String groupingSetColumnName = - reduceSinkInputRowResolver.get(null, GROUPING_SET_KEY).getInternalName(); + reduceSinkInputRowResolver.get(null, VirtualColumn.GROUPINGID.getName()).getInternalName(); ExprNodeDesc inputExpr = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, groupingSetColumnName, null, false); reduceKeys.add(inputExpr); @@ -2768,8 +2770,8 @@ String field = Utilities.ReduceField.KEY.toString() + "." + getColumnInternalName(reduceKeys.size() - 1); ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( - reduceKeys.size() - 1).getTypeInfo(), null, false); - reduceSinkOutputRowResolver.put(null, GROUPING_SET_KEY, colInfo); + reduceKeys.size() - 1).getTypeInfo(), null, true); + reduceSinkOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), colInfo); colExprMap.put(colInfo.getInternalName(), inputExpr); } @@ -3016,8 +3018,12 @@ groupByKeys.add(constant); String field = getColumnInternalName(groupByKeys.size() - 1); outputColumnNames.add(field); - groupByOutputRowResolver.put(null, GROUPING_SET_KEY, - new ColumnInfo(field, TypeInfoFactory.stringTypeInfo, "", false)); + groupByOutputRowResolver.put(null, VirtualColumn.GROUPINGID.getName(), + new ColumnInfo( + field, + TypeInfoFactory.stringTypeInfo, + null, + true)); colExprMap.put(field, constant); }