Index: ivy/libraries.properties
===================================================================
--- ivy/libraries.properties (revision 1393603)
+++ ivy/libraries.properties (working copy)
@@ -64,3 +64,4 @@
slf4j-log4j12.version=1.6.1
velocity.version=1.5
zookeeper.version=3.4.3
+javolution.version=5.5.1
Index: ql/ivy.xml
===================================================================
--- ql/ivy.xml (revision 1393603)
+++ ql/ivy.xml (working copy)
@@ -73,7 +73,8 @@
transitive="false"/>
-
+
+
Index: ql/src/test/results/clientpositive/groupby_cube1.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_cube1.q.out (revision 0)
+++ ql/src/test/results/clientpositive/groupby_cube1.q.out (working copy)
@@ -0,0 +1,547 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+NULL 11 1
+NULL 12 1
+NULL 13 1
+NULL 17 1
+NULL 18 1
+NULL 28 1
+1 NULL 1
+1 11 1
+2 NULL 1
+2 12 1
+3 NULL 1
+3 13 1
+7 NULL 1
+7 17 1
+8 NULL 2
+8 18 1
+8 28 1
+PREHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT val)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: '0'
+ type: string
+ expr: val
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL 6
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+NULL 11 1
+NULL 12 1
+NULL 13 1
+NULL 17 1
+NULL 18 1
+NULL 28 1
+1 NULL 1
+1 11 1
+2 NULL 1
+2 12 1
+3 NULL 1
+3 13 1
+7 NULL 1
+7 17 1
+8 NULL 2
+8 18 1
+8 28 1
+PREHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT val)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: '0'
+ type: string
+ expr: val
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL 6
+1 1
+2 1
+3 1
+7 1
+8 2
Index: ql/src/test/results/clientpositive/groupby_rollup1.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_rollup1.q.out (revision 0)
+++ ql/src/test/results/clientpositive/groupby_rollup1.q.out (working copy)
@@ -0,0 +1,535 @@
+PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@T1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+PREHOOK: type: LOAD
+PREHOOK: Output: default@t1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@t1
+PREHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+1 NULL 1
+1 11 1
+2 NULL 1
+2 12 1
+3 NULL 1
+3 13 1
+7 NULL 1
+7 17 1
+8 NULL 2
+8 18 1
+8 28 1
+PREHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT val)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: '0'
+ type: string
+ expr: val
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL 6
+1 1
+2 1
+3 1
+7 1
+8 2
+PREHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ expr: '0'
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: rand()
+ type: double
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ expr: KEY._col2
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col3
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL NULL 6
+1 NULL 1
+1 11 1
+2 NULL 1
+2 12 1
+3 NULL 1
+3 13 1
+7 NULL 1
+7 17 1
+8 NULL 2
+8 18 1
+8 28 1
+PREHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_TABLE_OR_COL val)))) (TOK_ROLLUP_GROUPBY (TOK_TABLE_OR_COL key))))
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Alias -> Map Operator Tree:
+ t1
+ TableScan
+ alias: t1
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: val
+ type: string
+ outputColumnNames: key, val
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT val)
+ bucketGroup: false
+ keys:
+ expr: key
+ type: string
+ expr: '0'
+ type: string
+ expr: val
+ type: string
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ expr: _col2
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col3
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:0._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: partials
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+#### A masked pattern was here ####
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: _col0
+ type: string
+ tag: -1
+ value expressions:
+ expr: _col2
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: final
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+
+
+PREHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+#### A masked pattern was here ####
+NULL 6
+1 1
+2 1
+3 1
+7 1
+8 2
Index: ql/src/test/results/clientnegative/groupby_cube1.q.out
===================================================================
--- ql/src/test/results/clientnegative/groupby_cube1.q.out (revision 0)
+++ ql/src/test/results/clientnegative/groupby_cube1.q.out (working copy)
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10209]: Grouping sets aggregations (with rollups or cubes) are not allowed if map-side aggregation is turned off. Set hive.map.aggr=true if you want to use grouping sets
Index: ql/src/test/results/clientnegative/groupby_rollup1.q.out
===================================================================
--- ql/src/test/results/clientnegative/groupby_rollup1.q.out (revision 0)
+++ ql/src/test/results/clientnegative/groupby_rollup1.q.out (working copy)
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10209]: Grouping sets aggregations (with rollups or cubes) are not allowed if map-side aggregation is turned off. Set hive.map.aggr=true if you want to use grouping sets
Index: ql/src/test/results/clientnegative/groupby_cube2.q.out
===================================================================
--- ql/src/test/results/clientnegative/groupby_cube2.q.out (revision 0)
+++ ql/src/test/results/clientnegative/groupby_cube2.q.out (working copy)
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10210]: Grouping sets aggregations (with rollups or cubes) are not allowed if aggregation function parameters overlap with the aggregation functions columns
Index: ql/src/test/results/clientnegative/groupby_rollup2.q.out
===================================================================
--- ql/src/test/results/clientnegative/groupby_rollup2.q.out (revision 0)
+++ ql/src/test/results/clientnegative/groupby_rollup2.q.out (working copy)
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10210]: Grouping sets aggregations (with rollups or cubes) are not allowed if aggregation function parameters overlap with the aggregation functions columns
Index: ql/src/test/results/compiler/plan/groupby2.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby2.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby2.q.xml (working copy)
@@ -720,6 +720,9 @@
0.5
+
+ 1
+
+
+
+
0.9
Index: ql/src/test/results/compiler/plan/groupby4.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby4.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby4.q.xml (working copy)
@@ -438,6 +438,9 @@
0.5
+
+ 1
+
+
+
+
0.9
Index: ql/src/test/results/compiler/plan/groupby6.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby6.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby6.q.xml (working copy)
@@ -438,6 +438,9 @@
0.5
+
+ 1
+
+
+
+
0.9
Index: ql/src/test/results/compiler/plan/groupby1.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby1.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby1.q.xml (working copy)
@@ -627,6 +627,9 @@
0.5
+
+ 1
+
+
+
+
0.9
Index: ql/src/test/results/compiler/plan/groupby3.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby3.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby3.q.xml (working copy)
@@ -922,6 +922,9 @@
+
+
+
0.9
Index: ql/src/test/results/compiler/plan/groupby5.q.xml
===================================================================
--- ql/src/test/results/compiler/plan/groupby5.q.xml (revision 1393603)
+++ ql/src/test/results/compiler/plan/groupby5.q.xml (working copy)
@@ -493,6 +493,9 @@
0.5
+
+ 1
+
+
+
+
0.9
Index: ql/src/test/queries/clientpositive/groupby_rollup1.q
===================================================================
--- ql/src/test/queries/clientpositive/groupby_rollup1.q (revision 0)
+++ ql/src/test/queries/clientpositive/groupby_rollup1.q (working copy)
@@ -0,0 +1,32 @@
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup;
+
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val;
+
+EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup;
+
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key;
+
+set hive.groupby.skewindata=true;
+
+EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup;
+
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with rollup
+ORDER BY key, val;
+
+EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup;
+
+SELECT key, count(distinct val) FROM T1 GROUP BY key with rollup
+ORDER BY key;
Index: ql/src/test/queries/clientpositive/groupby_cube1.q
===================================================================
--- ql/src/test/queries/clientpositive/groupby_cube1.q (revision 0)
+++ ql/src/test/queries/clientpositive/groupby_cube1.q (working copy)
@@ -0,0 +1,32 @@
+set hive.map.aggr=true;
+set hive.groupby.skewindata=false;
+
+CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE;
+
+LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
+
+EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
+
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val;
+
+EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube;
+
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key;
+
+set hive.groupby.skewindata=true;
+
+EXPLAIN
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube;
+
+SELECT key, val, count(1) FROM T1 GROUP BY key, val with cube
+ORDER BY key, val;
+
+EXPLAIN
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube;
+
+SELECT key, count(distinct val) FROM T1 GROUP BY key with cube
+ORDER BY key;
Index: ql/src/test/queries/clientnegative/groupby_cube1.q
===================================================================
--- ql/src/test/queries/clientnegative/groupby_cube1.q (revision 0)
+++ ql/src/test/queries/clientnegative/groupby_cube1.q (working copy)
@@ -0,0 +1,4 @@
+set hive.map.aggr=false;
+
+SELECT key, count(distinct value) FROM src GROUP BY key with cube;
+
Index: ql/src/test/queries/clientnegative/groupby_cube2.q
===================================================================
--- ql/src/test/queries/clientnegative/groupby_cube2.q (revision 0)
+++ ql/src/test/queries/clientnegative/groupby_cube2.q (working copy)
@@ -0,0 +1,4 @@
+set hive.map.aggr=true;
+
+SELECT key, value, count(distinct value) FROM src GROUP BY key, value with cube;
+
Index: ql/src/test/queries/clientnegative/groupby_rollup1.q
===================================================================
--- ql/src/test/queries/clientnegative/groupby_rollup1.q (revision 0)
+++ ql/src/test/queries/clientnegative/groupby_rollup1.q (working copy)
@@ -0,0 +1,4 @@
+set hive.map.aggr=false;
+
+SELECT key, value, count(1) FROM src GROUP BY key, value with rollup;
+
Index: ql/src/test/queries/clientnegative/groupby_rollup2.q
===================================================================
--- ql/src/test/queries/clientnegative/groupby_rollup2.q (revision 0)
+++ ql/src/test/queries/clientnegative/groupby_rollup2.q (working copy)
@@ -0,0 +1,4 @@
+set hive.map.aggr=true;
+
+SELECT key, value, count(key) FROM src GROUP BY key, value with rollup;
+
Index: ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (revision 1393603)
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (working copy)
@@ -32,6 +32,8 @@
import java.util.Map;
import java.util.Set;
+import javolution.util.FastBitSet;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -40,6 +42,7 @@
import org.apache.hadoop.hive.ql.parse.OpParseContext;
import org.apache.hadoop.hive.ql.plan.AggregationDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.GroupByDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
@@ -106,7 +109,6 @@
protected transient ArrayList objectInspectors;
transient ArrayList fieldNames;
- transient KeyWrapperFactory keyWrapperFactory;
// Used by sort-based GroupBy: Mode = COMPLETE, PARTIAL1, PARTIAL2,
// MERGEPARTIAL
protected transient KeyWrapper currentKeys;
@@ -144,6 +146,12 @@
private long maxMemory;
private float memoryThreshold;
+ private boolean groupingSetsPresent;
+ private int groupingSetsPosition;
+ private List groupingSets;
+ private List groupingSetsBitSet;
+ transient private List