Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java (revision 1183502) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectsEqualComparer.java (working copy) @@ -175,4 +175,25 @@ } return true; } + + public int firstDifference(Object[] ol0, Object[] ol1) { + if (ol0.length != numFields || ol1.length != numFields) { + assert (ol0.length <= numFields); + assert (ol1.length <= numFields); + int min = Math.min(ol0.length, ol1.length); + for (int i = 0; i < min; i++) { + if (!fieldComparers[i].areEqual(ol0[i], ol1[i])) { + return i; + } + } + return min; + } + + for (int i = 0; i < numFields; i++) { + if (!fieldComparers[i].areEqual(ol0[i], ol1[i])) { + return i; + } + } + return numFields; + } } Index: ql/src/test/results/clientpositive/groupby1_limit_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_limit_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_limit_withrollup.q.out (revision 0) @@ -0,0 +1,150 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)) (TOK_LIMIT 5))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-12-40_017_658592696422092735/-mr-10002 + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + Reduce Operator Tree: + Extract + Limit + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-12-53_125_5509942651643665754/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-12-53_125_5509942651643665754/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 Index: ql/src/test/results/clientpositive/groupby_ppr_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_ppr_withrollup.q.out (revision 0) @@ -0,0 +1,338 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + tag: -1 + Needs Tagging: false + Path -> Alias: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src] + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src] + Path -> Partition: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295772 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295771 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295772 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295771 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + expr: sum(KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-02_213_5748373876327124284/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string,string,bigint,double + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-02_213_5748373876327124284/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Needs Tagging: false + Path -> Alias: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-02_213_5748373876327124284/-mr-10002 [file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-02_213_5748373876327124284/-mr-10002] + Path -> Partition: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-02_213_5748373876327124284/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string,string,bigint,double + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string,string,bigint,double + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_11-19-02_213_5748373876327124284/-ext-10000 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_11-19-02_213_5748373876327124284/-ext-10000/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314296342 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_11-19-02_213_5748373876327124284/-ext-10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314296342 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_11-19-02_213_5748373876327124284/-ext-10001 + + Stage: Stage-3 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_11-19-02_213_5748373876327124284/-ext-10000/ + + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-14_673_7964906477338629918/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-19-14_673_7964906477338629918/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 132828.0 +2 69 251142.0 +3 62 364008.0 +4 74 4105526.0 +5 6 5794.0 +6 5 6796.0 +7 6 71470.0 +8 8 81524.0 +9 7 92094.0 Index: ql/src/test/results/clientpositive/groupby2_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_withrollup.q.out (revision 0) @@ -0,0 +1,215 @@ +PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + expr: sum(KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-21_349_6780822501784053068/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-21_349_6780822501784053068/-mr-10003 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-4 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-36_642_5682062865334495000/-mr-10000 +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-36_642_5682062865334495000/-mr-10000 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 Index: ql/src/test/results/clientpositive/groupby7_map_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_map_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_map_withrollup.q.out (revision 0) @@ -0,0 +1,859 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-06_076_2445993225668544464/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-21_084_6317718038390298457/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-21_084_6317718038390298457/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-21_235_932971373288944353/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-21_235_932971373288944353/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby8_noskew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_noskew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_noskew_withrollup.q.out (revision 0) @@ -0,0 +1,915 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-15-40_175_3005655529156092163/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-15-40_175_3005655529156092163/-mr-10005 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-15-40_175_3005655529156092163/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-04_893_9116636934870253035/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-04_893_9116636934870253035/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_060_8026050833820839180/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_060_8026050833820839180/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q.out (revision 0) @@ -0,0 +1,309 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + expr: sum(DISTINCT substr(value, 5)) + expr: count(DISTINCT value) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + expr: _col4 + type: double + expr: _col5 + type: double + expr: _col6 + type: bigint + Needs Tagging: false + Path -> Alias: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src] + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src] + Path -> Partition: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060551 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060550 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060551 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060550 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + expr: sum(DISTINCT KEY._col1:1._col0) + expr: count(DISTINCT KEY._col1:2._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + expr: _col3 + type: double + expr: _col4 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + expr: UDFToInteger(_col3) + type: int + expr: UDFToInteger(_col4) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-36_028_1912622880990238458/-ext-10000 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-36_028_1912622880990238458/-ext-10000/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.types string:int:string:int:int + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060936 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-36_028_1912622880990238458/-ext-10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.types string:int:string:int:int + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060936 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-36_028_1912622880990238458/-ext-10001 + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-36_028_1912622880990238458/-ext-10000/ + + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-55-43_653_1757451209542462510/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-55-43_653_1757451209542462510/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL 79136 309 +0 1 00.0 0 1 +1 71 132828.0 10044 71 +2 69 251142.0 15780 69 +3 62 364008.0 20119 62 +4 74 4105526.0 30965 74 +5 6 5794.0 278 6 +6 5 6796.0 331 5 +7 6 71470.0 447 6 +8 8 81524.0 595 8 +9 7 92094.0 577 7 Index: ql/src/test/results/clientpositive/groupby_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_withrollup.q.out (revision 0) @@ -0,0 +1,2557 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + expr: count(substr(value, length(value), 1)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + expr: count(DISTINCT substr(value, length(value), 1)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + expr: substr(value, length(value), 1) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-06-47_076_6311729349243006989/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-06-47_076_6311729349243006989/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + tag: -1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-06-47_076_6311729349243006989/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-13_336_2833048230417908007/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-13_336_2833048230417908007/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 500 +0 NULL 0.0 3 +0 0 0.0 3 +1 NULL 16414.0 115 +1 0 1300.0 10 +1 1 816.0 6 +1 2 1026.0 8 +1 3 1786.0 12 +1 4 1460.0 10 +1 5 1755.0 13 +1 6 1726.0 11 +1 7 2081.0 13 +1 8 1712.0 14 +1 9 2752.0 18 +2 NULL 25571.0 111 +2 0 2390.0 11 +2 1 1737.0 7 +2 2 2824.0 12 +2 3 3109.0 13 +2 4 1512.0 8 +2 5 2245.0 9 +2 6 2070.0 10 +2 7 3248.0 14 +2 8 4106.0 17 +2 9 2330.0 10 +3 NULL 32004.0 99 +3 0 700.0 3 +3 1 2929.0 9 +3 2 3480.0 10 +3 3 2494.0 8 +3 4 3006.0 9 +3 5 3585.0 13 +3 6 3886.0 11 +3 7 4208.0 14 +3 8 4086.0 12 +3 9 3630.0 10 +4 NULL 52763.0 124 +4 0 4550.0 10 +4 1 5143.0 13 +4 2 4232.0 11 +4 3 4876.0 12 +4 4 5316.0 14 +4 5 2345.0 5 +4 6 4400.0 10 +4 7 4954.0 12 +4 8 7418.0 16 +4 9 9529.0 21 +5 NULL 397.0 10 +5 1 102.0 2 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 3 +5 7 57.0 1 +5 8 116.0 2 +6 NULL 398.0 6 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 2 +6 9 69.0 1 +7 NULL 735.0 10 +7 0 210.0 3 +7 2 144.0 2 +7 4 74.0 1 +7 6 152.0 2 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 10 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 2 +8 4 168.0 2 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 12 +9 0 270.0 3 +9 2 92.0 1 +9 5 190.0 2 +9 6 96.0 1 +9 7 194.0 2 +9 8 196.0 2 +9 9 9.0 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-13_516_8995768525875670621/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-13_516_8995768525875670621/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 10 +0 NULL 0.0 1 +0 0 0.0 1 +1 NULL 16414.0 10 +1 0 1300.0 1 +1 1 816.0 1 +1 2 1026.0 1 +1 3 1786.0 1 +1 4 1460.0 1 +1 5 1755.0 1 +1 6 1726.0 1 +1 7 2081.0 1 +1 8 1712.0 1 +1 9 2752.0 1 +2 NULL 25571.0 10 +2 0 2390.0 1 +2 1 1737.0 1 +2 2 2824.0 1 +2 3 3109.0 1 +2 4 1512.0 1 +2 5 2245.0 1 +2 6 2070.0 1 +2 7 3248.0 1 +2 8 4106.0 1 +2 9 2330.0 1 +3 NULL 32004.0 10 +3 0 700.0 1 +3 1 2929.0 1 +3 2 3480.0 1 +3 3 2494.0 1 +3 4 3006.0 1 +3 5 3585.0 1 +3 6 3886.0 1 +3 7 4208.0 1 +3 8 4086.0 1 +3 9 3630.0 1 +4 NULL 52763.0 10 +4 0 4550.0 1 +4 1 5143.0 1 +4 2 4232.0 1 +4 3 4876.0 1 +4 4 5316.0 1 +4 5 2345.0 1 +4 6 4400.0 1 +4 7 4954.0 1 +4 8 7418.0 1 +4 9 9529.0 1 +5 NULL 397.0 6 +5 1 102.0 1 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 1 +5 7 57.0 1 +5 8 116.0 1 +6 NULL 398.0 5 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 1 +6 9 69.0 1 +7 NULL 735.0 6 +7 0 210.0 1 +7 2 144.0 1 +7 4 74.0 1 +7 6 152.0 1 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 8 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 1 +8 4 168.0 1 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 7 +9 0 270.0 1 +9 2 92.0 1 +9 5 190.0 1 +9 6 96.0 1 +9 7 194.0 1 +9 8 196.0 1 +9 9 9.0 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + expr: count(substr(value, length(value), 1)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + expr: count(DISTINCT substr(value, length(value), 1)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + expr: substr(value, length(value), 1) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-13_686_2135741946140686122/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_251_8521162492149444803/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_251_8521162492149444803/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 500 +0 NULL 0.0 3 +0 0 0.0 3 +1 NULL 16414.0 115 +1 0 1300.0 10 +1 1 816.0 6 +1 2 1026.0 8 +1 3 1786.0 12 +1 4 1460.0 10 +1 5 1755.0 13 +1 6 1726.0 11 +1 7 2081.0 13 +1 8 1712.0 14 +1 9 2752.0 18 +2 NULL 25571.0 111 +2 0 2390.0 11 +2 1 1737.0 7 +2 2 2824.0 12 +2 3 3109.0 13 +2 4 1512.0 8 +2 5 2245.0 9 +2 6 2070.0 10 +2 7 3248.0 14 +2 8 4106.0 17 +2 9 2330.0 10 +3 NULL 32004.0 99 +3 0 700.0 3 +3 1 2929.0 9 +3 2 3480.0 10 +3 3 2494.0 8 +3 4 3006.0 9 +3 5 3585.0 13 +3 6 3886.0 11 +3 7 4208.0 14 +3 8 4086.0 12 +3 9 3630.0 10 +4 NULL 52763.0 124 +4 0 4550.0 10 +4 1 5143.0 13 +4 2 4232.0 11 +4 3 4876.0 12 +4 4 5316.0 14 +4 5 2345.0 5 +4 6 4400.0 10 +4 7 4954.0 12 +4 8 7418.0 16 +4 9 9529.0 21 +5 NULL 397.0 10 +5 1 102.0 2 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 3 +5 7 57.0 1 +5 8 116.0 2 +6 NULL 398.0 6 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 2 +6 9 69.0 1 +7 NULL 735.0 10 +7 0 210.0 3 +7 2 144.0 2 +7 4 74.0 1 +7 6 152.0 2 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 10 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 2 +8 4 168.0 2 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 12 +9 0 270.0 3 +9 2 92.0 1 +9 5 190.0 2 +9 6 96.0 1 +9 7 194.0 2 +9 8 196.0 2 +9 9 9.0 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_415_9094374995817645187/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_415_9094374995817645187/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 10 +0 NULL 0.0 1 +0 0 0.0 1 +1 NULL 16414.0 10 +1 0 1300.0 1 +1 1 816.0 1 +1 2 1026.0 1 +1 3 1786.0 1 +1 4 1460.0 1 +1 5 1755.0 1 +1 6 1726.0 1 +1 7 2081.0 1 +1 8 1712.0 1 +1 9 2752.0 1 +2 NULL 25571.0 10 +2 0 2390.0 1 +2 1 1737.0 1 +2 2 2824.0 1 +2 3 3109.0 1 +2 4 1512.0 1 +2 5 2245.0 1 +2 6 2070.0 1 +2 7 3248.0 1 +2 8 4106.0 1 +2 9 2330.0 1 +3 NULL 32004.0 10 +3 0 700.0 1 +3 1 2929.0 1 +3 2 3480.0 1 +3 3 2494.0 1 +3 4 3006.0 1 +3 5 3585.0 1 +3 6 3886.0 1 +3 7 4208.0 1 +3 8 4086.0 1 +3 9 3630.0 1 +4 NULL 52763.0 10 +4 0 4550.0 1 +4 1 5143.0 1 +4 2 4232.0 1 +4 3 4876.0 1 +4 4 5316.0 1 +4 5 2345.0 1 +4 6 4400.0 1 +4 7 4954.0 1 +4 8 7418.0 1 +4 9 9529.0 1 +5 NULL 397.0 6 +5 1 102.0 1 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 1 +5 7 57.0 1 +5 8 116.0 1 +6 NULL 398.0 5 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 1 +6 9 69.0 1 +7 NULL 735.0 6 +7 0 210.0 1 +7 2 144.0 1 +7 4 74.0 1 +7 6 152.0 1 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 8 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 1 +8 4 168.0 1 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 7 +9 0 270.0 1 +9 2 92.0 1 +9 5 190.0 1 +9 6 96.0 1 +9 7 194.0 1 +9 8 196.0 1 +9 9 9.0 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + sort order: ++ + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + expr: substr(value, length(value), 1) + type: string + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_584_8344644518065507236/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_584_8344644518065507236/-mr-10005 + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + expr: substr(value, length(value), 1) + type: string + sort order: +++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + expr: substr(value, length(value), 1) + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_584_8344644518065507236/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + tag: -1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-29_584_8344644518065507236/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-8 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-56_718_1758950165316497394/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-56_718_1758950165316497394/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 500 +0 NULL 0.0 3 +0 0 0.0 3 +1 NULL 16414.0 115 +1 0 1300.0 10 +1 1 816.0 6 +1 2 1026.0 8 +1 3 1786.0 12 +1 4 1460.0 10 +1 5 1755.0 13 +1 6 1726.0 11 +1 7 2081.0 13 +1 8 1712.0 14 +1 9 2752.0 18 +2 NULL 25571.0 111 +2 0 2390.0 11 +2 1 1737.0 7 +2 2 2824.0 12 +2 3 3109.0 13 +2 4 1512.0 8 +2 5 2245.0 9 +2 6 2070.0 10 +2 7 3248.0 14 +2 8 4106.0 17 +2 9 2330.0 10 +3 NULL 32004.0 99 +3 0 700.0 3 +3 1 2929.0 9 +3 2 3480.0 10 +3 3 2494.0 8 +3 4 3006.0 9 +3 5 3585.0 13 +3 6 3886.0 11 +3 7 4208.0 14 +3 8 4086.0 12 +3 9 3630.0 10 +4 NULL 52763.0 124 +4 0 4550.0 10 +4 1 5143.0 13 +4 2 4232.0 11 +4 3 4876.0 12 +4 4 5316.0 14 +4 5 2345.0 5 +4 6 4400.0 10 +4 7 4954.0 12 +4 8 7418.0 16 +4 9 9529.0 21 +5 NULL 397.0 10 +5 1 102.0 2 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 3 +5 7 57.0 1 +5 8 116.0 2 +6 NULL 398.0 6 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 2 +6 9 69.0 1 +7 NULL 735.0 10 +7 0 210.0 3 +7 2 144.0 2 +7 4 74.0 1 +7 6 152.0 2 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 10 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 2 +8 4 168.0 2 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 12 +9 0 270.0 3 +9 2 92.0 1 +9 5 190.0 2 +9 6 96.0 1 +9 7 194.0 2 +9 8 196.0 2 +9 9 9.0 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-57_049_8422054283058343431/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-57_049_8422054283058343431/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 10 +0 NULL 0.0 1 +0 0 0.0 1 +1 NULL 16414.0 10 +1 0 1300.0 1 +1 1 816.0 1 +1 2 1026.0 1 +1 3 1786.0 1 +1 4 1460.0 1 +1 5 1755.0 1 +1 6 1726.0 1 +1 7 2081.0 1 +1 8 1712.0 1 +1 9 2752.0 1 +2 NULL 25571.0 10 +2 0 2390.0 1 +2 1 1737.0 1 +2 2 2824.0 1 +2 3 3109.0 1 +2 4 1512.0 1 +2 5 2245.0 1 +2 6 2070.0 1 +2 7 3248.0 1 +2 8 4106.0 1 +2 9 2330.0 1 +3 NULL 32004.0 10 +3 0 700.0 1 +3 1 2929.0 1 +3 2 3480.0 1 +3 3 2494.0 1 +3 4 3006.0 1 +3 5 3585.0 1 +3 6 3886.0 1 +3 7 4208.0 1 +3 8 4086.0 1 +3 9 3630.0 1 +4 NULL 52763.0 10 +4 0 4550.0 1 +4 1 5143.0 1 +4 2 4232.0 1 +4 3 4876.0 1 +4 4 5316.0 1 +4 5 2345.0 1 +4 6 4400.0 1 +4 7 4954.0 1 +4 8 7418.0 1 +4 9 9529.0 1 +5 NULL 397.0 6 +5 1 102.0 1 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 1 +5 7 57.0 1 +5 8 116.0 1 +6 NULL 398.0 5 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 1 +6 9 69.0 1 +7 NULL 735.0 6 +7 0 210.0 1 +7 2 144.0 1 +7 4 74.0 1 +7 6 152.0 1 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 8 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 1 +8 4 168.0 1 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 7 +9 0 270.0 1 +9 2 92.0 1 +9 5 190.0 1 +9 6 96.0 1 +9 7 194.0 1 +9 8 196.0 1 +9 9 9.0 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1)) (TOK_SELEXPR (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) (TOK_FUNCTION LENGTH (. (TOK_TABLE_OR_COL SRC) value)) 1)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) 1 1) (TOK_FUNCTION SUBSTR (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key)) (TOK_FUNCTION LENGTH (TOK_FUNCTION TOK_STRING (. (TOK_TABLE_OR_COL SRC) key))) 1)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + expr: substr(value, length(value), 1) + type: string + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-57_204_6160687393458983995/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-57_204_6160687393458983995/-mr-10005 + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + expr: substr(value, length(value), 1) + type: string + sort order: +++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + expr: substr(key, length(key), 1) + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-07-57_204_6160687393458983995/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: double + expr: _col4 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: double + expr: _col3 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-08-22_440_2017600598400929563/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-08-22_440_2017600598400929563/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 500 +0 NULL 0.0 3 +0 0 0.0 3 +1 NULL 16414.0 115 +1 0 1300.0 10 +1 1 816.0 6 +1 2 1026.0 8 +1 3 1786.0 12 +1 4 1460.0 10 +1 5 1755.0 13 +1 6 1726.0 11 +1 7 2081.0 13 +1 8 1712.0 14 +1 9 2752.0 18 +2 NULL 25571.0 111 +2 0 2390.0 11 +2 1 1737.0 7 +2 2 2824.0 12 +2 3 3109.0 13 +2 4 1512.0 8 +2 5 2245.0 9 +2 6 2070.0 10 +2 7 3248.0 14 +2 8 4106.0 17 +2 9 2330.0 10 +3 NULL 32004.0 99 +3 0 700.0 3 +3 1 2929.0 9 +3 2 3480.0 10 +3 3 2494.0 8 +3 4 3006.0 9 +3 5 3585.0 13 +3 6 3886.0 11 +3 7 4208.0 14 +3 8 4086.0 12 +3 9 3630.0 10 +4 NULL 52763.0 124 +4 0 4550.0 10 +4 1 5143.0 13 +4 2 4232.0 11 +4 3 4876.0 12 +4 4 5316.0 14 +4 5 2345.0 5 +4 6 4400.0 10 +4 7 4954.0 12 +4 8 7418.0 16 +4 9 9529.0 21 +5 NULL 397.0 10 +5 1 102.0 2 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 3 +5 7 57.0 1 +5 8 116.0 2 +6 NULL 398.0 6 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 2 +6 9 69.0 1 +7 NULL 735.0 10 +7 0 210.0 3 +7 2 144.0 2 +7 4 74.0 1 +7 6 152.0 2 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 10 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 2 +8 4 168.0 2 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 12 +9 0 270.0 3 +9 2 92.0 1 +9 5 190.0 2 +9 6 96.0 1 +9 7 194.0 2 +9 8 196.0 2 +9 9 9.0 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-08-22_598_6269156541386184570/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-08-22_598_6269156541386184570/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 130091.0 10 +0 NULL 0.0 1 +0 0 0.0 1 +1 NULL 16414.0 10 +1 0 1300.0 1 +1 1 816.0 1 +1 2 1026.0 1 +1 3 1786.0 1 +1 4 1460.0 1 +1 5 1755.0 1 +1 6 1726.0 1 +1 7 2081.0 1 +1 8 1712.0 1 +1 9 2752.0 1 +2 NULL 25571.0 10 +2 0 2390.0 1 +2 1 1737.0 1 +2 2 2824.0 1 +2 3 3109.0 1 +2 4 1512.0 1 +2 5 2245.0 1 +2 6 2070.0 1 +2 7 3248.0 1 +2 8 4106.0 1 +2 9 2330.0 1 +3 NULL 32004.0 10 +3 0 700.0 1 +3 1 2929.0 1 +3 2 3480.0 1 +3 3 2494.0 1 +3 4 3006.0 1 +3 5 3585.0 1 +3 6 3886.0 1 +3 7 4208.0 1 +3 8 4086.0 1 +3 9 3630.0 1 +4 NULL 52763.0 10 +4 0 4550.0 1 +4 1 5143.0 1 +4 2 4232.0 1 +4 3 4876.0 1 +4 4 5316.0 1 +4 5 2345.0 1 +4 6 4400.0 1 +4 7 4954.0 1 +4 8 7418.0 1 +4 9 9529.0 1 +5 NULL 397.0 6 +5 1 102.0 1 +5 3 53.0 1 +5 4 54.0 1 +5 5 15.0 1 +5 7 57.0 1 +5 8 116.0 1 +6 NULL 398.0 5 +6 4 64.0 1 +6 5 65.0 1 +6 6 66.0 1 +6 7 134.0 1 +6 9 69.0 1 +7 NULL 735.0 6 +7 0 210.0 1 +7 2 144.0 1 +7 4 74.0 1 +7 6 152.0 1 +7 7 77.0 1 +7 8 78.0 1 +8 NULL 762.0 8 +8 0 80.0 1 +8 2 82.0 1 +8 3 166.0 1 +8 4 168.0 1 +8 5 85.0 1 +8 6 86.0 1 +8 7 87.0 1 +8 8 8.0 1 +9 NULL 1047.0 7 +9 0 270.0 1 +9 2 92.0 1 +9 5 190.0 1 +9 6 96.0 1 +9 7 194.0 1 +9 8 196.0 1 +9 9 9.0 1 Index: ql/src/test/results/clientpositive/groupby1_map_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_map_withrollup.q.out (revision 0) @@ -0,0 +1,431 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-20_770_6688290238070205837/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-20_770_6688290238070205837/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby_map_ppr_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_map_ppr_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_map_ppr_withrollup.q.out (revision 0) @@ -0,0 +1,285 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Needs Tagging: false + Path -> Alias: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src] + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src] + Path -> Partition: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060551 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060550 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060551 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060550 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-44_014_5747029729480381344/-ext-10000 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-44_014_5747029729480381344/-ext-10000/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060943 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-44_014_5747029729480381344/-ext-10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2 + columns.types string:int:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314060943 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-44_014_5747029729480381344/-ext-10001 + + Stage: Stage-2 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-22_17-55-44_014_5747029729480381344/-ext-10000/ + + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-55-51_760_5491896815690295574/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-55-51_760_5491896815690295574/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 132828.0 +2 69 251142.0 +3 62 364008.0 +4 74 4105526.0 +5 6 5794.0 +6 5 6796.0 +7 6 71470.0 +8 8 81524.0 +9 7 92094.0 Index: ql/src/test/results/clientpositive/groupby8_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_withrollup.q.out (revision 0) @@ -0,0 +1,1988 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_531_4357255552981445404/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_531_4357255552981445404/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_531_4357255552981445404/-mr-10006 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_531_4357255552981445404/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-05_531_4357255552981445404/-mr-10008 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-9 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_541_249550882488427477/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_541_249550882488427477/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_693_3915346726829481741/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_693_3915346726829481741/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_903_4761108159950029487/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_903_4761108159950029487/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_903_4761108159950029487/-mr-10006 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_903_4761108159950029487/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-16-37_903_4761108159950029487/-mr-10008 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-9 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-17-08_827_8716001153902588834/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-17-08_827_8716001153902588834/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-17-08_971_5815472221775742031/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-17-08_971_5815472221775742031/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby2_map_multi_distinct_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map_multi_distinct_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_map_multi_distinct_withrollup.q.out (revision 0) @@ -0,0 +1,170 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + expr: sum(DISTINCT substr(value, 5)) + expr: count(value) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + expr: _col4 + type: double + expr: _col5 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + expr: sum(DISTINCT KEY._col1:1._col0) + expr: count(VALUE._col3) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + expr: _col3 + type: double + expr: _col4 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + expr: UDFToInteger(_col3) + type: int + expr: UDFToInteger(_col4) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-12_330_5873638002354852499/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-12_330_5873638002354852499/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL 79136 500 +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 Index: ql/src/test/results/clientpositive/groupby11_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby11_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby11_withrollup.q.out (revision 0) @@ -0,0 +1,1031 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) value)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) value))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2) (TOK_PARTSPEC (TOK_PARTVAL ds '111')))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) key)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: value + type: string + expr: key + type: string + tag: -1 + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1) + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-45_758_632109172880149525/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-45_758_632109172880149525/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-45_758_632109172880149525/-mr-10006 + Reduce Output Operator + key expressions: + expr: substr(value, 5) + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(value, 5) + type: string + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1) + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-45_758_632109172880149525/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-45_758_632109172880149525/-mr-10008 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-9 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1@ds=111 +PREHOOK: Output: default@dest2@ds=111 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1@ds=111 +POSTHOOK: Output: default@dest2@ds=111 +POSTHOOK: Lineage: dest1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1@ds=111 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-11-17_201_3714510288277248414/-mr-10000 +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1@ds=111 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-11-17_201_3714510288277248414/-mr-10000 +POSTHOOK: Lineage: dest1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 500 309 111 +val_0 3 1 111 +val_10 1 1 111 +val_100 2 1 111 +val_103 2 1 111 +val_104 2 1 111 +val_105 1 1 111 +val_11 1 1 111 +val_111 1 1 111 +val_113 2 1 111 +val_114 1 1 111 +val_116 1 1 111 +val_118 2 1 111 +val_119 3 1 111 +val_12 2 1 111 +val_120 2 1 111 +val_125 2 1 111 +val_126 1 1 111 +val_128 3 1 111 +val_129 2 1 111 +val_131 1 1 111 +val_133 1 1 111 +val_134 2 1 111 +val_136 1 1 111 +val_137 2 1 111 +val_138 4 1 111 +val_143 1 1 111 +val_145 1 1 111 +val_146 2 1 111 +val_149 2 1 111 +val_15 2 1 111 +val_150 1 1 111 +val_152 2 1 111 +val_153 1 1 111 +val_155 1 1 111 +val_156 1 1 111 +val_157 1 1 111 +val_158 1 1 111 +val_160 1 1 111 +val_162 1 1 111 +val_163 1 1 111 +val_164 2 1 111 +val_165 2 1 111 +val_166 1 1 111 +val_167 3 1 111 +val_168 1 1 111 +val_169 4 1 111 +val_17 1 1 111 +val_170 1 1 111 +val_172 2 1 111 +val_174 2 1 111 +val_175 2 1 111 +val_176 2 1 111 +val_177 1 1 111 +val_178 1 1 111 +val_179 2 1 111 +val_18 2 1 111 +val_180 1 1 111 +val_181 1 1 111 +val_183 1 1 111 +val_186 1 1 111 +val_187 3 1 111 +val_189 1 1 111 +val_19 1 1 111 +val_190 1 1 111 +val_191 2 1 111 +val_192 1 1 111 +val_193 3 1 111 +val_194 1 1 111 +val_195 2 1 111 +val_196 1 1 111 +val_197 2 1 111 +val_199 3 1 111 +val_2 1 1 111 +val_20 1 1 111 +val_200 2 1 111 +val_201 1 1 111 +val_202 1 1 111 +val_203 2 1 111 +val_205 2 1 111 +val_207 2 1 111 +val_208 3 1 111 +val_209 2 1 111 +val_213 2 1 111 +val_214 1 1 111 +val_216 2 1 111 +val_217 2 1 111 +val_218 1 1 111 +val_219 2 1 111 +val_221 2 1 111 +val_222 1 1 111 +val_223 2 1 111 +val_224 2 1 111 +val_226 1 1 111 +val_228 1 1 111 +val_229 2 1 111 +val_230 5 1 111 +val_233 2 1 111 +val_235 1 1 111 +val_237 2 1 111 +val_238 2 1 111 +val_239 2 1 111 +val_24 2 1 111 +val_241 1 1 111 +val_242 2 1 111 +val_244 1 1 111 +val_247 1 1 111 +val_248 1 1 111 +val_249 1 1 111 +val_252 1 1 111 +val_255 2 1 111 +val_256 2 1 111 +val_257 1 1 111 +val_258 1 1 111 +val_26 2 1 111 +val_260 1 1 111 +val_262 1 1 111 +val_263 1 1 111 +val_265 2 1 111 +val_266 1 1 111 +val_27 1 1 111 +val_272 2 1 111 +val_273 3 1 111 +val_274 1 1 111 +val_275 1 1 111 +val_277 4 1 111 +val_278 2 1 111 +val_28 1 1 111 +val_280 2 1 111 +val_281 2 1 111 +val_282 2 1 111 +val_283 1 1 111 +val_284 1 1 111 +val_285 1 1 111 +val_286 1 1 111 +val_287 1 1 111 +val_288 2 1 111 +val_289 1 1 111 +val_291 1 1 111 +val_292 1 1 111 +val_296 1 1 111 +val_298 3 1 111 +val_30 1 1 111 +val_302 1 1 111 +val_305 1 1 111 +val_306 1 1 111 +val_307 2 1 111 +val_308 1 1 111 +val_309 2 1 111 +val_310 1 1 111 +val_311 3 1 111 +val_315 1 1 111 +val_316 3 1 111 +val_317 2 1 111 +val_318 3 1 111 +val_321 2 1 111 +val_322 2 1 111 +val_323 1 1 111 +val_325 2 1 111 +val_327 3 1 111 +val_33 1 1 111 +val_331 2 1 111 +val_332 1 1 111 +val_333 2 1 111 +val_335 1 1 111 +val_336 1 1 111 +val_338 1 1 111 +val_339 1 1 111 +val_34 1 1 111 +val_341 1 1 111 +val_342 2 1 111 +val_344 2 1 111 +val_345 1 1 111 +val_348 5 1 111 +val_35 3 1 111 +val_351 1 1 111 +val_353 2 1 111 +val_356 1 1 111 +val_360 1 1 111 +val_362 1 1 111 +val_364 1 1 111 +val_365 1 1 111 +val_366 1 1 111 +val_367 2 1 111 +val_368 1 1 111 +val_369 3 1 111 +val_37 2 1 111 +val_373 1 1 111 +val_374 1 1 111 +val_375 1 1 111 +val_377 1 1 111 +val_378 1 1 111 +val_379 1 1 111 +val_382 2 1 111 +val_384 3 1 111 +val_386 1 1 111 +val_389 1 1 111 +val_392 1 1 111 +val_393 1 1 111 +val_394 1 1 111 +val_395 2 1 111 +val_396 3 1 111 +val_397 2 1 111 +val_399 2 1 111 +val_4 1 1 111 +val_400 1 1 111 +val_401 5 1 111 +val_402 1 1 111 +val_403 3 1 111 +val_404 2 1 111 +val_406 4 1 111 +val_407 1 1 111 +val_409 3 1 111 +val_41 1 1 111 +val_411 1 1 111 +val_413 2 1 111 +val_414 2 1 111 +val_417 3 1 111 +val_418 1 1 111 +val_419 1 1 111 +val_42 2 1 111 +val_421 1 1 111 +val_424 2 1 111 +val_427 1 1 111 +val_429 2 1 111 +val_43 1 1 111 +val_430 3 1 111 +val_431 3 1 111 +val_432 1 1 111 +val_435 1 1 111 +val_436 1 1 111 +val_437 1 1 111 +val_438 3 1 111 +val_439 2 1 111 +val_44 1 1 111 +val_443 1 1 111 +val_444 1 1 111 +val_446 1 1 111 +val_448 1 1 111 +val_449 1 1 111 +val_452 1 1 111 +val_453 1 1 111 +val_454 3 1 111 +val_455 1 1 111 +val_457 1 1 111 +val_458 2 1 111 +val_459 2 1 111 +val_460 1 1 111 +val_462 2 1 111 +val_463 2 1 111 +val_466 3 1 111 +val_467 1 1 111 +val_468 4 1 111 +val_469 5 1 111 +val_47 1 1 111 +val_470 1 1 111 +val_472 1 1 111 +val_475 1 1 111 +val_477 1 1 111 +val_478 2 1 111 +val_479 1 1 111 +val_480 3 1 111 +val_481 1 1 111 +val_482 1 1 111 +val_483 1 1 111 +val_484 1 1 111 +val_485 1 1 111 +val_487 1 1 111 +val_489 4 1 111 +val_490 1 1 111 +val_491 1 1 111 +val_492 2 1 111 +val_493 1 1 111 +val_494 1 1 111 +val_495 1 1 111 +val_496 1 1 111 +val_497 1 1 111 +val_498 3 1 111 +val_5 3 1 111 +val_51 2 1 111 +val_53 1 1 111 +val_54 1 1 111 +val_57 1 1 111 +val_58 2 1 111 +val_64 1 1 111 +val_65 1 1 111 +val_66 1 1 111 +val_67 2 1 111 +val_69 1 1 111 +val_70 3 1 111 +val_72 2 1 111 +val_74 1 1 111 +val_76 2 1 111 +val_77 1 1 111 +val_78 1 1 111 +val_8 1 1 111 +val_80 1 1 111 +val_82 1 1 111 +val_83 2 1 111 +val_84 2 1 111 +val_85 1 1 111 +val_86 1 1 111 +val_87 1 1 111 +val_9 1 1 111 +val_90 3 1 111 +val_92 1 1 111 +val_95 2 1 111 +val_96 1 1 111 +val_97 2 1 111 +val_98 2 1 111 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2@ds=111 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-11-17_376_2515637164733146385/-mr-10000 +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2@ds=111 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-11-17_376_2515637164733146385/-mr-10000 +POSTHOOK: Lineage: dest1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 500 309 111 +0 3 1 111 +10 1 1 111 +100 2 1 111 +103 2 1 111 +104 2 1 111 +105 1 1 111 +11 1 1 111 +111 1 1 111 +113 2 1 111 +114 1 1 111 +116 1 1 111 +118 2 1 111 +119 3 1 111 +12 2 1 111 +120 2 1 111 +125 2 1 111 +126 1 1 111 +128 3 1 111 +129 2 1 111 +131 1 1 111 +133 1 1 111 +134 2 1 111 +136 1 1 111 +137 2 1 111 +138 4 1 111 +143 1 1 111 +145 1 1 111 +146 2 1 111 +149 2 1 111 +15 2 1 111 +150 1 1 111 +152 2 1 111 +153 1 1 111 +155 1 1 111 +156 1 1 111 +157 1 1 111 +158 1 1 111 +160 1 1 111 +162 1 1 111 +163 1 1 111 +164 2 1 111 +165 2 1 111 +166 1 1 111 +167 3 1 111 +168 1 1 111 +169 4 1 111 +17 1 1 111 +170 1 1 111 +172 2 1 111 +174 2 1 111 +175 2 1 111 +176 2 1 111 +177 1 1 111 +178 1 1 111 +179 2 1 111 +18 2 1 111 +180 1 1 111 +181 1 1 111 +183 1 1 111 +186 1 1 111 +187 3 1 111 +189 1 1 111 +19 1 1 111 +190 1 1 111 +191 2 1 111 +192 1 1 111 +193 3 1 111 +194 1 1 111 +195 2 1 111 +196 1 1 111 +197 2 1 111 +199 3 1 111 +2 1 1 111 +20 1 1 111 +200 2 1 111 +201 1 1 111 +202 1 1 111 +203 2 1 111 +205 2 1 111 +207 2 1 111 +208 3 1 111 +209 2 1 111 +213 2 1 111 +214 1 1 111 +216 2 1 111 +217 2 1 111 +218 1 1 111 +219 2 1 111 +221 2 1 111 +222 1 1 111 +223 2 1 111 +224 2 1 111 +226 1 1 111 +228 1 1 111 +229 2 1 111 +230 5 1 111 +233 2 1 111 +235 1 1 111 +237 2 1 111 +238 2 1 111 +239 2 1 111 +24 2 1 111 +241 1 1 111 +242 2 1 111 +244 1 1 111 +247 1 1 111 +248 1 1 111 +249 1 1 111 +252 1 1 111 +255 2 1 111 +256 2 1 111 +257 1 1 111 +258 1 1 111 +26 2 1 111 +260 1 1 111 +262 1 1 111 +263 1 1 111 +265 2 1 111 +266 1 1 111 +27 1 1 111 +272 2 1 111 +273 3 1 111 +274 1 1 111 +275 1 1 111 +277 4 1 111 +278 2 1 111 +28 1 1 111 +280 2 1 111 +281 2 1 111 +282 2 1 111 +283 1 1 111 +284 1 1 111 +285 1 1 111 +286 1 1 111 +287 1 1 111 +288 2 1 111 +289 1 1 111 +291 1 1 111 +292 1 1 111 +296 1 1 111 +298 3 1 111 +30 1 1 111 +302 1 1 111 +305 1 1 111 +306 1 1 111 +307 2 1 111 +308 1 1 111 +309 2 1 111 +310 1 1 111 +311 3 1 111 +315 1 1 111 +316 3 1 111 +317 2 1 111 +318 3 1 111 +321 2 1 111 +322 2 1 111 +323 1 1 111 +325 2 1 111 +327 3 1 111 +33 1 1 111 +331 2 1 111 +332 1 1 111 +333 2 1 111 +335 1 1 111 +336 1 1 111 +338 1 1 111 +339 1 1 111 +34 1 1 111 +341 1 1 111 +342 2 1 111 +344 2 1 111 +345 1 1 111 +348 5 1 111 +35 3 1 111 +351 1 1 111 +353 2 1 111 +356 1 1 111 +360 1 1 111 +362 1 1 111 +364 1 1 111 +365 1 1 111 +366 1 1 111 +367 2 1 111 +368 1 1 111 +369 3 1 111 +37 2 1 111 +373 1 1 111 +374 1 1 111 +375 1 1 111 +377 1 1 111 +378 1 1 111 +379 1 1 111 +382 2 1 111 +384 3 1 111 +386 1 1 111 +389 1 1 111 +392 1 1 111 +393 1 1 111 +394 1 1 111 +395 2 1 111 +396 3 1 111 +397 2 1 111 +399 2 1 111 +4 1 1 111 +400 1 1 111 +401 5 1 111 +402 1 1 111 +403 3 1 111 +404 2 1 111 +406 4 1 111 +407 1 1 111 +409 3 1 111 +41 1 1 111 +411 1 1 111 +413 2 1 111 +414 2 1 111 +417 3 1 111 +418 1 1 111 +419 1 1 111 +42 2 1 111 +421 1 1 111 +424 2 1 111 +427 1 1 111 +429 2 1 111 +43 1 1 111 +430 3 1 111 +431 3 1 111 +432 1 1 111 +435 1 1 111 +436 1 1 111 +437 1 1 111 +438 3 1 111 +439 2 1 111 +44 1 1 111 +443 1 1 111 +444 1 1 111 +446 1 1 111 +448 1 1 111 +449 1 1 111 +452 1 1 111 +453 1 1 111 +454 3 1 111 +455 1 1 111 +457 1 1 111 +458 2 1 111 +459 2 1 111 +460 1 1 111 +462 2 1 111 +463 2 1 111 +466 3 1 111 +467 1 1 111 +468 4 1 111 +469 5 1 111 +47 1 1 111 +470 1 1 111 +472 1 1 111 +475 1 1 111 +477 1 1 111 +478 2 1 111 +479 1 1 111 +480 3 1 111 +481 1 1 111 +482 1 1 111 +483 1 1 111 +484 1 1 111 +485 1 1 111 +487 1 1 111 +489 4 1 111 +490 1 1 111 +491 1 1 111 +492 2 1 111 +493 1 1 111 +494 1 1 111 +495 1 1 111 +496 1 1 111 +497 1 1 111 +498 3 1 111 +5 3 1 111 +51 2 1 111 +53 1 1 111 +54 1 1 111 +57 1 1 111 +58 2 1 111 +64 1 1 111 +65 1 1 111 +66 1 1 111 +67 2 1 111 +69 1 1 111 +70 3 1 111 +72 2 1 111 +74 1 1 111 +76 2 1 111 +77 1 1 111 +78 1 1 111 +8 1 1 111 +80 1 1 111 +82 1 1 111 +83 2 1 111 +84 2 1 111 +85 1 1 111 +86 1 1 111 +87 1 1 111 +9 1 1 111 +90 3 1 111 +92 1 1 111 +95 2 1 111 +96 1 1 111 +97 2 1 111 +98 2 1 111 Index: ql/src/test/results/clientpositive/groupby_ppr_multi_distinct_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct_withrollup.q.out (revision 0) @@ -0,0 +1,364 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + GatherStats: false + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + expr: value + type: string + sort order: +++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + tag: -1 + Needs Tagging: false + Path -> Alias: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [src] + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 [src] + Path -> Partition: + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295116 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295115 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=12 + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295117 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/srcpart + name default.srcpart + partition_columns ds/hr + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295115 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + expr: sum(KEY._col1) + expr: sum(DISTINCT KEY._col1) + expr: count(DISTINCT KEY._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + expr: KEY._col2 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-42_584_1577958987845121185/-mr-10002 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,string,bigint,double,double,bigint + escape.delim \ + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-42_584_1577958987845121185/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + expr: _col4 + type: double + expr: _col5 + type: double + expr: _col6 + type: bigint + Needs Tagging: false + Path -> Alias: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-42_584_1577958987845121185/-mr-10002 [file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-42_584_1577958987845121185/-mr-10002] + Path -> Partition: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-42_584_1577958987845121185/-mr-10002 + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,string,bigint,double,double,bigint + escape.delim \ + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,string,bigint,double,double,bigint + escape.delim \ + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + expr: sum(DISTINCT KEY._col1:1._col0) + expr: count(DISTINCT KEY._col1:2._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + expr: _col3 + type: double + expr: _col4 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + expr: UDFToInteger(_col3) + type: int + expr: UDFToInteger(_col4) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_10-58-42_584_1577958987845121185/-ext-10000 + NumFilesPerFileSink: 1 + Stats Publishing Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_10-58-42_584_1577958987845121185/-ext-10000/ + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.types string:int:string:int:int + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295122 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_10-58-42_584_1577958987845121185/-ext-10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.types string:int:string:int:int + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/test/data/warehouse/dest1 + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1314295122 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + tmp directory: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_10-58-42_584_1577958987845121185/-ext-10001 + + Stage: Stage-3 + Stats-Aggr Operator + Stats Aggregation Key Prefix: pfile:/Users/kevinwilfong/Documents/hive_rollup/build/ql/scratchdir/hive_2011-08-25_10-58-42_584_1577958987845121185/-ext-10000/ + + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-58_187_6735659875160191874/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_10-58-58_187_6735659875160191874/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL 79136 309 +0 1 00.0 0 1 +1 71 132828.0 10044 71 +2 69 251142.0 15780 69 +3 62 364008.0 20119 62 +4 74 4105526.0 30965 74 +5 6 5794.0 278 6 +6 5 6796.0 331 5 +7 6 71470.0 447 6 +8 8 81524.0 595 8 +9 7 92094.0 577 7 Index: ql/src/test/results/clientpositive/groupby7_noskew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_noskew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_noskew_withrollup.q.out (revision 0) @@ -0,0 +1,909 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-10_383_1766112361533795325/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-10_383_1766112361533795325/-mr-10005 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-10_383_1766112361533795325/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-33_139_7346303037028506871/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-33_139_7346303037028506871/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-33_284_7362366093867161455/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-39-33_284_7362366093867161455/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby_neg_float_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby_neg_float_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby_neg_float_withrollup.q.out (revision 0) @@ -0,0 +1,36 @@ +PREHOOK: query: FROM src +SELECT cast('-30.33' as DOUBLE) +GROUP BY cast('-30.33' as DOUBLE) +WITH ROLLUP +LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-19-10_091_183249028163579532/-mr-10000 +POSTHOOK: query: FROM src +SELECT cast('-30.33' as DOUBLE) +GROUP BY cast('-30.33' as DOUBLE) +WITH ROLLUP +LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-19-10_091_183249028163579532/-mr-10000 +NULL +-30.33 +PREHOOK: query: FROM src +SELECT '-30.33' +GROUP BY '-30.33' +WITH ROLLUP +LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-19-15_461_8788670220771176115/-mr-10000 +POSTHOOK: query: FROM src +SELECT '-30.33' +GROUP BY '-30.33' +WITH ROLLUP +LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-19-15_461_8788670220771176115/-mr-10000 +NULL +-30.33 Index: ql/src/test/results/clientpositive/groupby1_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_withrollup.q.out (revision 0) @@ -0,0 +1,456 @@ +PREHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_g1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-58-28_537_847822798365851953/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g1 +POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g1.* FROM dest_g1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-58-42_194_8357586835448463188/-mr-10000 +POSTHOOK: query: SELECT dest_g1.* FROM dest_g1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-58-42_194_8357586835448463188/-mr-10000 +POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby1_map_nomap_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map_nomap_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_map_nomap_withrollup.q.out (revision 0) @@ -0,0 +1,431 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-01_092_4412052902028054982/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-01_092_4412052902028054982/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby8_map_skew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_map_skew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_map_skew_withrollup.q.out (revision 0) @@ -0,0 +1,939 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-07_804_7929870772700707701/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-07_804_7929870772700707701/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-07_804_7929870772700707701/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-29_207_4657586685269007896/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-29_207_4657586685269007896/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-29_353_2642375057452674167/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-29_353_2642375057452674167/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby2_noskew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_noskew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_noskew_withrollup.q.out (revision 0) @@ -0,0 +1,174 @@ +PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + expr: sum(KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-09_173_7898396798878517993/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-20_977_5948781011137660343/-mr-10000 +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-20_977_5948781011137660343/-mr-10000 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 Index: ql/src/test/results/clientpositive/groupby7_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_withrollup.q.out (revision 0) @@ -0,0 +1,672 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-48-41_319_3506335105595686450/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-48-41_319_3506335105595686450/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-48-41_524_1328849410581176585/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_16-48-41_524_1328849410581176585/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby2_map_skew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map_skew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_map_skew_withrollup.q.out (revision 0) @@ -0,0 +1,189 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-12_754_2896475339771389366/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-25_126_2121312564414882330/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-25_126_2121312564414882330/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 Index: ql/src/test/results/clientpositive/groupby2_limit_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_limit_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_limit_withrollup.q.out (revision 0) @@ -0,0 +1,91 @@ +PREHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)) (TOK_LIMIT 5))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 5 + + +PREHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-41_168_8968150533203725215/-mr-10000 +POSTHOOK: query: SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-13-41_168_8968150533203725215/-mr-10000 +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 Index: ql/src/test/results/clientpositive/groupby9_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby9_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby9_withrollup.q.out (revision 0) @@ -0,0 +1,6107 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-40_946_1984661858950312374/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-55_303_3708667332622893516/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-55_303_3708667332622893516/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-55_447_3675073597682644100/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-55_447_3675073597682644100/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 309 +0 NULL 1 +0 val_0 1 +10 NULL 1 +10 val_10 1 +100 NULL 1 +100 val_100 1 +103 NULL 1 +103 val_103 1 +104 NULL 1 +104 val_104 1 +105 NULL 1 +105 val_105 1 +11 NULL 1 +11 val_11 1 +111 NULL 1 +111 val_111 1 +113 NULL 1 +113 val_113 1 +114 NULL 1 +114 val_114 1 +116 NULL 1 +116 val_116 1 +118 NULL 1 +118 val_118 1 +119 NULL 1 +119 val_119 1 +12 NULL 1 +12 val_12 1 +120 NULL 1 +120 val_120 1 +125 NULL 1 +125 val_125 1 +126 NULL 1 +126 val_126 1 +128 NULL 1 +128 val_128 1 +129 NULL 1 +129 val_129 1 +131 NULL 1 +131 val_131 1 +133 NULL 1 +133 val_133 1 +134 NULL 1 +134 val_134 1 +136 NULL 1 +136 val_136 1 +137 NULL 1 +137 val_137 1 +138 NULL 1 +138 val_138 1 +143 NULL 1 +143 val_143 1 +145 NULL 1 +145 val_145 1 +146 NULL 1 +146 val_146 1 +149 NULL 1 +149 val_149 1 +15 NULL 1 +15 val_15 1 +150 NULL 1 +150 val_150 1 +152 NULL 1 +152 val_152 1 +153 NULL 1 +153 val_153 1 +155 NULL 1 +155 val_155 1 +156 NULL 1 +156 val_156 1 +157 NULL 1 +157 val_157 1 +158 NULL 1 +158 val_158 1 +160 NULL 1 +160 val_160 1 +162 NULL 1 +162 val_162 1 +163 NULL 1 +163 val_163 1 +164 NULL 1 +164 val_164 1 +165 NULL 1 +165 val_165 1 +166 NULL 1 +166 val_166 1 +167 NULL 1 +167 val_167 1 +168 NULL 1 +168 val_168 1 +169 NULL 1 +169 val_169 1 +17 NULL 1 +17 val_17 1 +170 NULL 1 +170 val_170 1 +172 NULL 1 +172 val_172 1 +174 NULL 1 +174 val_174 1 +175 NULL 1 +175 val_175 1 +176 NULL 1 +176 val_176 1 +177 NULL 1 +177 val_177 1 +178 NULL 1 +178 val_178 1 +179 NULL 1 +179 val_179 1 +18 NULL 1 +18 val_18 1 +180 NULL 1 +180 val_180 1 +181 NULL 1 +181 val_181 1 +183 NULL 1 +183 val_183 1 +186 NULL 1 +186 val_186 1 +187 NULL 1 +187 val_187 1 +189 NULL 1 +189 val_189 1 +19 NULL 1 +19 val_19 1 +190 NULL 1 +190 val_190 1 +191 NULL 1 +191 val_191 1 +192 NULL 1 +192 val_192 1 +193 NULL 1 +193 val_193 1 +194 NULL 1 +194 val_194 1 +195 NULL 1 +195 val_195 1 +196 NULL 1 +196 val_196 1 +197 NULL 1 +197 val_197 1 +199 NULL 1 +199 val_199 1 +2 NULL 1 +2 val_2 1 +20 NULL 1 +20 val_20 1 +200 NULL 1 +200 val_200 1 +201 NULL 1 +201 val_201 1 +202 NULL 1 +202 val_202 1 +203 NULL 1 +203 val_203 1 +205 NULL 1 +205 val_205 1 +207 NULL 1 +207 val_207 1 +208 NULL 1 +208 val_208 1 +209 NULL 1 +209 val_209 1 +213 NULL 1 +213 val_213 1 +214 NULL 1 +214 val_214 1 +216 NULL 1 +216 val_216 1 +217 NULL 1 +217 val_217 1 +218 NULL 1 +218 val_218 1 +219 NULL 1 +219 val_219 1 +221 NULL 1 +221 val_221 1 +222 NULL 1 +222 val_222 1 +223 NULL 1 +223 val_223 1 +224 NULL 1 +224 val_224 1 +226 NULL 1 +226 val_226 1 +228 NULL 1 +228 val_228 1 +229 NULL 1 +229 val_229 1 +230 NULL 1 +230 val_230 1 +233 NULL 1 +233 val_233 1 +235 NULL 1 +235 val_235 1 +237 NULL 1 +237 val_237 1 +238 NULL 1 +238 val_238 1 +239 NULL 1 +239 val_239 1 +24 NULL 1 +24 val_24 1 +241 NULL 1 +241 val_241 1 +242 NULL 1 +242 val_242 1 +244 NULL 1 +244 val_244 1 +247 NULL 1 +247 val_247 1 +248 NULL 1 +248 val_248 1 +249 NULL 1 +249 val_249 1 +252 NULL 1 +252 val_252 1 +255 NULL 1 +255 val_255 1 +256 NULL 1 +256 val_256 1 +257 NULL 1 +257 val_257 1 +258 NULL 1 +258 val_258 1 +26 NULL 1 +26 val_26 1 +260 NULL 1 +260 val_260 1 +262 NULL 1 +262 val_262 1 +263 NULL 1 +263 val_263 1 +265 NULL 1 +265 val_265 1 +266 NULL 1 +266 val_266 1 +27 NULL 1 +27 val_27 1 +272 NULL 1 +272 val_272 1 +273 NULL 1 +273 val_273 1 +274 NULL 1 +274 val_274 1 +275 NULL 1 +275 val_275 1 +277 NULL 1 +277 val_277 1 +278 NULL 1 +278 val_278 1 +28 NULL 1 +28 val_28 1 +280 NULL 1 +280 val_280 1 +281 NULL 1 +281 val_281 1 +282 NULL 1 +282 val_282 1 +283 NULL 1 +283 val_283 1 +284 NULL 1 +284 val_284 1 +285 NULL 1 +285 val_285 1 +286 NULL 1 +286 val_286 1 +287 NULL 1 +287 val_287 1 +288 NULL 1 +288 val_288 1 +289 NULL 1 +289 val_289 1 +291 NULL 1 +291 val_291 1 +292 NULL 1 +292 val_292 1 +296 NULL 1 +296 val_296 1 +298 NULL 1 +298 val_298 1 +30 NULL 1 +30 val_30 1 +302 NULL 1 +302 val_302 1 +305 NULL 1 +305 val_305 1 +306 NULL 1 +306 val_306 1 +307 NULL 1 +307 val_307 1 +308 NULL 1 +308 val_308 1 +309 NULL 1 +309 val_309 1 +310 NULL 1 +310 val_310 1 +311 NULL 1 +311 val_311 1 +315 NULL 1 +315 val_315 1 +316 NULL 1 +316 val_316 1 +317 NULL 1 +317 val_317 1 +318 NULL 1 +318 val_318 1 +321 NULL 1 +321 val_321 1 +322 NULL 1 +322 val_322 1 +323 NULL 1 +323 val_323 1 +325 NULL 1 +325 val_325 1 +327 NULL 1 +327 val_327 1 +33 NULL 1 +33 val_33 1 +331 NULL 1 +331 val_331 1 +332 NULL 1 +332 val_332 1 +333 NULL 1 +333 val_333 1 +335 NULL 1 +335 val_335 1 +336 NULL 1 +336 val_336 1 +338 NULL 1 +338 val_338 1 +339 NULL 1 +339 val_339 1 +34 NULL 1 +34 val_34 1 +341 NULL 1 +341 val_341 1 +342 NULL 1 +342 val_342 1 +344 NULL 1 +344 val_344 1 +345 NULL 1 +345 val_345 1 +348 NULL 1 +348 val_348 1 +35 NULL 1 +35 val_35 1 +351 NULL 1 +351 val_351 1 +353 NULL 1 +353 val_353 1 +356 NULL 1 +356 val_356 1 +360 NULL 1 +360 val_360 1 +362 NULL 1 +362 val_362 1 +364 NULL 1 +364 val_364 1 +365 NULL 1 +365 val_365 1 +366 NULL 1 +366 val_366 1 +367 NULL 1 +367 val_367 1 +368 NULL 1 +368 val_368 1 +369 NULL 1 +369 val_369 1 +37 NULL 1 +37 val_37 1 +373 NULL 1 +373 val_373 1 +374 NULL 1 +374 val_374 1 +375 NULL 1 +375 val_375 1 +377 NULL 1 +377 val_377 1 +378 NULL 1 +378 val_378 1 +379 NULL 1 +379 val_379 1 +382 NULL 1 +382 val_382 1 +384 NULL 1 +384 val_384 1 +386 NULL 1 +386 val_386 1 +389 NULL 1 +389 val_389 1 +392 NULL 1 +392 val_392 1 +393 NULL 1 +393 val_393 1 +394 NULL 1 +394 val_394 1 +395 NULL 1 +395 val_395 1 +396 NULL 1 +396 val_396 1 +397 NULL 1 +397 val_397 1 +399 NULL 1 +399 val_399 1 +4 NULL 1 +4 val_4 1 +400 NULL 1 +400 val_400 1 +401 NULL 1 +401 val_401 1 +402 NULL 1 +402 val_402 1 +403 NULL 1 +403 val_403 1 +404 NULL 1 +404 val_404 1 +406 NULL 1 +406 val_406 1 +407 NULL 1 +407 val_407 1 +409 NULL 1 +409 val_409 1 +41 NULL 1 +41 val_41 1 +411 NULL 1 +411 val_411 1 +413 NULL 1 +413 val_413 1 +414 NULL 1 +414 val_414 1 +417 NULL 1 +417 val_417 1 +418 NULL 1 +418 val_418 1 +419 NULL 1 +419 val_419 1 +42 NULL 1 +42 val_42 1 +421 NULL 1 +421 val_421 1 +424 NULL 1 +424 val_424 1 +427 NULL 1 +427 val_427 1 +429 NULL 1 +429 val_429 1 +43 NULL 1 +43 val_43 1 +430 NULL 1 +430 val_430 1 +431 NULL 1 +431 val_431 1 +432 NULL 1 +432 val_432 1 +435 NULL 1 +435 val_435 1 +436 NULL 1 +436 val_436 1 +437 NULL 1 +437 val_437 1 +438 NULL 1 +438 val_438 1 +439 NULL 1 +439 val_439 1 +44 NULL 1 +44 val_44 1 +443 NULL 1 +443 val_443 1 +444 NULL 1 +444 val_444 1 +446 NULL 1 +446 val_446 1 +448 NULL 1 +448 val_448 1 +449 NULL 1 +449 val_449 1 +452 NULL 1 +452 val_452 1 +453 NULL 1 +453 val_453 1 +454 NULL 1 +454 val_454 1 +455 NULL 1 +455 val_455 1 +457 NULL 1 +457 val_457 1 +458 NULL 1 +458 val_458 1 +459 NULL 1 +459 val_459 1 +460 NULL 1 +460 val_460 1 +462 NULL 1 +462 val_462 1 +463 NULL 1 +463 val_463 1 +466 NULL 1 +466 val_466 1 +467 NULL 1 +467 val_467 1 +468 NULL 1 +468 val_468 1 +469 NULL 1 +469 val_469 1 +47 NULL 1 +47 val_47 1 +470 NULL 1 +470 val_470 1 +472 NULL 1 +472 val_472 1 +475 NULL 1 +475 val_475 1 +477 NULL 1 +477 val_477 1 +478 NULL 1 +478 val_478 1 +479 NULL 1 +479 val_479 1 +480 NULL 1 +480 val_480 1 +481 NULL 1 +481 val_481 1 +482 NULL 1 +482 val_482 1 +483 NULL 1 +483 val_483 1 +484 NULL 1 +484 val_484 1 +485 NULL 1 +485 val_485 1 +487 NULL 1 +487 val_487 1 +489 NULL 1 +489 val_489 1 +490 NULL 1 +490 val_490 1 +491 NULL 1 +491 val_491 1 +492 NULL 1 +492 val_492 1 +493 NULL 1 +493 val_493 1 +494 NULL 1 +494 val_494 1 +495 NULL 1 +495 val_495 1 +496 NULL 1 +496 val_496 1 +497 NULL 1 +497 val_497 1 +498 NULL 1 +498 val_498 1 +5 NULL 1 +5 val_5 1 +51 NULL 1 +51 val_51 1 +53 NULL 1 +53 val_53 1 +54 NULL 1 +54 val_54 1 +57 NULL 1 +57 val_57 1 +58 NULL 1 +58 val_58 1 +64 NULL 1 +64 val_64 1 +65 NULL 1 +65 val_65 1 +66 NULL 1 +66 val_66 1 +67 NULL 1 +67 val_67 1 +69 NULL 1 +69 val_69 1 +70 NULL 1 +70 val_70 1 +72 NULL 1 +72 val_72 1 +74 NULL 1 +74 val_74 1 +76 NULL 1 +76 val_76 1 +77 NULL 1 +77 val_77 1 +78 NULL 1 +78 val_78 1 +8 NULL 1 +8 val_8 1 +80 NULL 1 +80 val_80 1 +82 NULL 1 +82 val_82 1 +83 NULL 1 +83 val_83 1 +84 NULL 1 +84 val_84 1 +85 NULL 1 +85 val_85 1 +86 NULL 1 +86 val_86 1 +87 NULL 1 +87 val_87 1 +9 NULL 1 +9 val_9 1 +90 NULL 1 +90 val_90 1 +92 NULL 1 +92 val_92 1 +95 NULL 1 +95 val_95 1 +96 NULL 1 +96 val_96 1 +97 NULL 1 +97 val_97 1 +98 NULL 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-17-55_604_7583005985111378159/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col0 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-09_203_3633311268010652915/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-09_203_3633311268010652915/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-09_357_5907306815127394773/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-09_357_5907306815127394773/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 309 +NULL val_0 1 +0 val_0 1 +NULL val_10 1 +10 val_10 1 +NULL val_100 1 +100 val_100 1 +NULL val_103 1 +103 val_103 1 +NULL val_104 1 +104 val_104 1 +NULL val_105 1 +105 val_105 1 +NULL val_11 1 +11 val_11 1 +NULL val_111 1 +111 val_111 1 +NULL val_113 1 +113 val_113 1 +NULL val_114 1 +114 val_114 1 +NULL val_116 1 +116 val_116 1 +NULL val_118 1 +118 val_118 1 +NULL val_119 1 +119 val_119 1 +NULL val_12 1 +12 val_12 1 +NULL val_120 1 +120 val_120 1 +NULL val_125 1 +125 val_125 1 +NULL val_126 1 +126 val_126 1 +NULL val_128 1 +128 val_128 1 +NULL val_129 1 +129 val_129 1 +NULL val_131 1 +131 val_131 1 +NULL val_133 1 +133 val_133 1 +NULL val_134 1 +134 val_134 1 +NULL val_136 1 +136 val_136 1 +NULL val_137 1 +137 val_137 1 +NULL val_138 1 +138 val_138 1 +NULL val_143 1 +143 val_143 1 +NULL val_145 1 +145 val_145 1 +NULL val_146 1 +146 val_146 1 +NULL val_149 1 +149 val_149 1 +NULL val_15 1 +15 val_15 1 +NULL val_150 1 +150 val_150 1 +NULL val_152 1 +152 val_152 1 +NULL val_153 1 +153 val_153 1 +NULL val_155 1 +155 val_155 1 +NULL val_156 1 +156 val_156 1 +NULL val_157 1 +157 val_157 1 +NULL val_158 1 +158 val_158 1 +NULL val_160 1 +160 val_160 1 +NULL val_162 1 +162 val_162 1 +NULL val_163 1 +163 val_163 1 +NULL val_164 1 +164 val_164 1 +NULL val_165 1 +165 val_165 1 +NULL val_166 1 +166 val_166 1 +NULL val_167 1 +167 val_167 1 +NULL val_168 1 +168 val_168 1 +NULL val_169 1 +169 val_169 1 +NULL val_17 1 +17 val_17 1 +NULL val_170 1 +170 val_170 1 +NULL val_172 1 +172 val_172 1 +NULL val_174 1 +174 val_174 1 +NULL val_175 1 +175 val_175 1 +NULL val_176 1 +176 val_176 1 +NULL val_177 1 +177 val_177 1 +NULL val_178 1 +178 val_178 1 +NULL val_179 1 +179 val_179 1 +NULL val_18 1 +18 val_18 1 +NULL val_180 1 +180 val_180 1 +NULL val_181 1 +181 val_181 1 +NULL val_183 1 +183 val_183 1 +NULL val_186 1 +186 val_186 1 +NULL val_187 1 +187 val_187 1 +NULL val_189 1 +189 val_189 1 +NULL val_19 1 +19 val_19 1 +NULL val_190 1 +190 val_190 1 +NULL val_191 1 +191 val_191 1 +NULL val_192 1 +192 val_192 1 +NULL val_193 1 +193 val_193 1 +NULL val_194 1 +194 val_194 1 +NULL val_195 1 +195 val_195 1 +NULL val_196 1 +196 val_196 1 +NULL val_197 1 +197 val_197 1 +NULL val_199 1 +199 val_199 1 +NULL val_2 1 +2 val_2 1 +NULL val_20 1 +20 val_20 1 +NULL val_200 1 +200 val_200 1 +NULL val_201 1 +201 val_201 1 +NULL val_202 1 +202 val_202 1 +NULL val_203 1 +203 val_203 1 +NULL val_205 1 +205 val_205 1 +NULL val_207 1 +207 val_207 1 +NULL val_208 1 +208 val_208 1 +NULL val_209 1 +209 val_209 1 +NULL val_213 1 +213 val_213 1 +NULL val_214 1 +214 val_214 1 +NULL val_216 1 +216 val_216 1 +NULL val_217 1 +217 val_217 1 +NULL val_218 1 +218 val_218 1 +NULL val_219 1 +219 val_219 1 +NULL val_221 1 +221 val_221 1 +NULL val_222 1 +222 val_222 1 +NULL val_223 1 +223 val_223 1 +NULL val_224 1 +224 val_224 1 +NULL val_226 1 +226 val_226 1 +NULL val_228 1 +228 val_228 1 +NULL val_229 1 +229 val_229 1 +NULL val_230 1 +230 val_230 1 +NULL val_233 1 +233 val_233 1 +NULL val_235 1 +235 val_235 1 +NULL val_237 1 +237 val_237 1 +NULL val_238 1 +238 val_238 1 +NULL val_239 1 +239 val_239 1 +NULL val_24 1 +24 val_24 1 +NULL val_241 1 +241 val_241 1 +NULL val_242 1 +242 val_242 1 +NULL val_244 1 +244 val_244 1 +NULL val_247 1 +247 val_247 1 +NULL val_248 1 +248 val_248 1 +NULL val_249 1 +249 val_249 1 +NULL val_252 1 +252 val_252 1 +NULL val_255 1 +255 val_255 1 +NULL val_256 1 +256 val_256 1 +NULL val_257 1 +257 val_257 1 +NULL val_258 1 +258 val_258 1 +NULL val_26 1 +26 val_26 1 +NULL val_260 1 +260 val_260 1 +NULL val_262 1 +262 val_262 1 +NULL val_263 1 +263 val_263 1 +NULL val_265 1 +265 val_265 1 +NULL val_266 1 +266 val_266 1 +NULL val_27 1 +27 val_27 1 +NULL val_272 1 +272 val_272 1 +NULL val_273 1 +273 val_273 1 +NULL val_274 1 +274 val_274 1 +NULL val_275 1 +275 val_275 1 +NULL val_277 1 +277 val_277 1 +NULL val_278 1 +278 val_278 1 +NULL val_28 1 +28 val_28 1 +NULL val_280 1 +280 val_280 1 +NULL val_281 1 +281 val_281 1 +NULL val_282 1 +282 val_282 1 +NULL val_283 1 +283 val_283 1 +NULL val_284 1 +284 val_284 1 +NULL val_285 1 +285 val_285 1 +NULL val_286 1 +286 val_286 1 +NULL val_287 1 +287 val_287 1 +NULL val_288 1 +288 val_288 1 +NULL val_289 1 +289 val_289 1 +NULL val_291 1 +291 val_291 1 +NULL val_292 1 +292 val_292 1 +NULL val_296 1 +296 val_296 1 +NULL val_298 1 +298 val_298 1 +NULL val_30 1 +30 val_30 1 +NULL val_302 1 +302 val_302 1 +NULL val_305 1 +305 val_305 1 +NULL val_306 1 +306 val_306 1 +NULL val_307 1 +307 val_307 1 +NULL val_308 1 +308 val_308 1 +NULL val_309 1 +309 val_309 1 +NULL val_310 1 +310 val_310 1 +NULL val_311 1 +311 val_311 1 +NULL val_315 1 +315 val_315 1 +NULL val_316 1 +316 val_316 1 +NULL val_317 1 +317 val_317 1 +NULL val_318 1 +318 val_318 1 +NULL val_321 1 +321 val_321 1 +NULL val_322 1 +322 val_322 1 +NULL val_323 1 +323 val_323 1 +NULL val_325 1 +325 val_325 1 +NULL val_327 1 +327 val_327 1 +NULL val_33 1 +33 val_33 1 +NULL val_331 1 +331 val_331 1 +NULL val_332 1 +332 val_332 1 +NULL val_333 1 +333 val_333 1 +NULL val_335 1 +335 val_335 1 +NULL val_336 1 +336 val_336 1 +NULL val_338 1 +338 val_338 1 +NULL val_339 1 +339 val_339 1 +NULL val_34 1 +34 val_34 1 +NULL val_341 1 +341 val_341 1 +NULL val_342 1 +342 val_342 1 +NULL val_344 1 +344 val_344 1 +NULL val_345 1 +345 val_345 1 +NULL val_348 1 +348 val_348 1 +NULL val_35 1 +35 val_35 1 +NULL val_351 1 +351 val_351 1 +NULL val_353 1 +353 val_353 1 +NULL val_356 1 +356 val_356 1 +NULL val_360 1 +360 val_360 1 +NULL val_362 1 +362 val_362 1 +NULL val_364 1 +364 val_364 1 +NULL val_365 1 +365 val_365 1 +NULL val_366 1 +366 val_366 1 +NULL val_367 1 +367 val_367 1 +NULL val_368 1 +368 val_368 1 +NULL val_369 1 +369 val_369 1 +NULL val_37 1 +37 val_37 1 +NULL val_373 1 +373 val_373 1 +NULL val_374 1 +374 val_374 1 +NULL val_375 1 +375 val_375 1 +NULL val_377 1 +377 val_377 1 +NULL val_378 1 +378 val_378 1 +NULL val_379 1 +379 val_379 1 +NULL val_382 1 +382 val_382 1 +NULL val_384 1 +384 val_384 1 +NULL val_386 1 +386 val_386 1 +NULL val_389 1 +389 val_389 1 +NULL val_392 1 +392 val_392 1 +NULL val_393 1 +393 val_393 1 +NULL val_394 1 +394 val_394 1 +NULL val_395 1 +395 val_395 1 +NULL val_396 1 +396 val_396 1 +NULL val_397 1 +397 val_397 1 +NULL val_399 1 +399 val_399 1 +NULL val_4 1 +4 val_4 1 +NULL val_400 1 +400 val_400 1 +NULL val_401 1 +401 val_401 1 +NULL val_402 1 +402 val_402 1 +NULL val_403 1 +403 val_403 1 +NULL val_404 1 +404 val_404 1 +NULL val_406 1 +406 val_406 1 +NULL val_407 1 +407 val_407 1 +NULL val_409 1 +409 val_409 1 +NULL val_41 1 +41 val_41 1 +NULL val_411 1 +411 val_411 1 +NULL val_413 1 +413 val_413 1 +NULL val_414 1 +414 val_414 1 +NULL val_417 1 +417 val_417 1 +NULL val_418 1 +418 val_418 1 +NULL val_419 1 +419 val_419 1 +NULL val_42 1 +42 val_42 1 +NULL val_421 1 +421 val_421 1 +NULL val_424 1 +424 val_424 1 +NULL val_427 1 +427 val_427 1 +NULL val_429 1 +429 val_429 1 +NULL val_43 1 +43 val_43 1 +NULL val_430 1 +430 val_430 1 +NULL val_431 1 +431 val_431 1 +NULL val_432 1 +432 val_432 1 +NULL val_435 1 +435 val_435 1 +NULL val_436 1 +436 val_436 1 +NULL val_437 1 +437 val_437 1 +NULL val_438 1 +438 val_438 1 +NULL val_439 1 +439 val_439 1 +NULL val_44 1 +44 val_44 1 +NULL val_443 1 +443 val_443 1 +NULL val_444 1 +444 val_444 1 +NULL val_446 1 +446 val_446 1 +NULL val_448 1 +448 val_448 1 +NULL val_449 1 +449 val_449 1 +NULL val_452 1 +452 val_452 1 +NULL val_453 1 +453 val_453 1 +NULL val_454 1 +454 val_454 1 +NULL val_455 1 +455 val_455 1 +NULL val_457 1 +457 val_457 1 +NULL val_458 1 +458 val_458 1 +NULL val_459 1 +459 val_459 1 +NULL val_460 1 +460 val_460 1 +NULL val_462 1 +462 val_462 1 +NULL val_463 1 +463 val_463 1 +NULL val_466 1 +466 val_466 1 +NULL val_467 1 +467 val_467 1 +NULL val_468 1 +468 val_468 1 +NULL val_469 1 +469 val_469 1 +NULL val_47 1 +47 val_47 1 +NULL val_470 1 +470 val_470 1 +NULL val_472 1 +472 val_472 1 +NULL val_475 1 +475 val_475 1 +NULL val_477 1 +477 val_477 1 +NULL val_478 1 +478 val_478 1 +NULL val_479 1 +479 val_479 1 +NULL val_480 1 +480 val_480 1 +NULL val_481 1 +481 val_481 1 +NULL val_482 1 +482 val_482 1 +NULL val_483 1 +483 val_483 1 +NULL val_484 1 +484 val_484 1 +NULL val_485 1 +485 val_485 1 +NULL val_487 1 +487 val_487 1 +NULL val_489 1 +489 val_489 1 +NULL val_490 1 +490 val_490 1 +NULL val_491 1 +491 val_491 1 +NULL val_492 1 +492 val_492 1 +NULL val_493 1 +493 val_493 1 +NULL val_494 1 +494 val_494 1 +NULL val_495 1 +495 val_495 1 +NULL val_496 1 +496 val_496 1 +NULL val_497 1 +497 val_497 1 +NULL val_498 1 +498 val_498 1 +NULL val_5 1 +5 val_5 1 +NULL val_51 1 +51 val_51 1 +NULL val_53 1 +53 val_53 1 +NULL val_54 1 +54 val_54 1 +NULL val_57 1 +57 val_57 1 +NULL val_58 1 +58 val_58 1 +NULL val_64 1 +64 val_64 1 +NULL val_65 1 +65 val_65 1 +NULL val_66 1 +66 val_66 1 +NULL val_67 1 +67 val_67 1 +NULL val_69 1 +69 val_69 1 +NULL val_70 1 +70 val_70 1 +NULL val_72 1 +72 val_72 1 +NULL val_74 1 +74 val_74 1 +NULL val_76 1 +76 val_76 1 +NULL val_77 1 +77 val_77 1 +NULL val_78 1 +78 val_78 1 +NULL val_8 1 +8 val_8 1 +NULL val_80 1 +80 val_80 1 +NULL val_82 1 +82 val_82 1 +NULL val_83 1 +83 val_83 1 +NULL val_84 1 +84 val_84 1 +NULL val_85 1 +85 val_85 1 +NULL val_86 1 +86 val_86 1 +NULL val_87 1 +87 val_87 1 +NULL val_9 1 +9 val_9 1 +NULL val_90 1 +90 val_90 1 +NULL val_92 1 +92 val_92 1 +NULL val_95 1 +95 val_95 1 +NULL val_96 1 +96 val_96 1 +NULL val_97 1 +97 val_97 1 +NULL val_98 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-09_514_246382705812768066/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-24_235_410136376588786338/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-24_235_410136376588786338/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-24_376_1649462409682110652/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-24_376_1649462409682110652/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 309 +0 NULL 1 +0 val_0 1 +10 NULL 1 +10 val_10 1 +100 NULL 1 +100 val_100 1 +103 NULL 1 +103 val_103 1 +104 NULL 1 +104 val_104 1 +105 NULL 1 +105 val_105 1 +11 NULL 1 +11 val_11 1 +111 NULL 1 +111 val_111 1 +113 NULL 1 +113 val_113 1 +114 NULL 1 +114 val_114 1 +116 NULL 1 +116 val_116 1 +118 NULL 1 +118 val_118 1 +119 NULL 1 +119 val_119 1 +12 NULL 1 +12 val_12 1 +120 NULL 1 +120 val_120 1 +125 NULL 1 +125 val_125 1 +126 NULL 1 +126 val_126 1 +128 NULL 1 +128 val_128 1 +129 NULL 1 +129 val_129 1 +131 NULL 1 +131 val_131 1 +133 NULL 1 +133 val_133 1 +134 NULL 1 +134 val_134 1 +136 NULL 1 +136 val_136 1 +137 NULL 1 +137 val_137 1 +138 NULL 1 +138 val_138 1 +143 NULL 1 +143 val_143 1 +145 NULL 1 +145 val_145 1 +146 NULL 1 +146 val_146 1 +149 NULL 1 +149 val_149 1 +15 NULL 1 +15 val_15 1 +150 NULL 1 +150 val_150 1 +152 NULL 1 +152 val_152 1 +153 NULL 1 +153 val_153 1 +155 NULL 1 +155 val_155 1 +156 NULL 1 +156 val_156 1 +157 NULL 1 +157 val_157 1 +158 NULL 1 +158 val_158 1 +160 NULL 1 +160 val_160 1 +162 NULL 1 +162 val_162 1 +163 NULL 1 +163 val_163 1 +164 NULL 1 +164 val_164 1 +165 NULL 1 +165 val_165 1 +166 NULL 1 +166 val_166 1 +167 NULL 1 +167 val_167 1 +168 NULL 1 +168 val_168 1 +169 NULL 1 +169 val_169 1 +17 NULL 1 +17 val_17 1 +170 NULL 1 +170 val_170 1 +172 NULL 1 +172 val_172 1 +174 NULL 1 +174 val_174 1 +175 NULL 1 +175 val_175 1 +176 NULL 1 +176 val_176 1 +177 NULL 1 +177 val_177 1 +178 NULL 1 +178 val_178 1 +179 NULL 1 +179 val_179 1 +18 NULL 1 +18 val_18 1 +180 NULL 1 +180 val_180 1 +181 NULL 1 +181 val_181 1 +183 NULL 1 +183 val_183 1 +186 NULL 1 +186 val_186 1 +187 NULL 1 +187 val_187 1 +189 NULL 1 +189 val_189 1 +19 NULL 1 +19 val_19 1 +190 NULL 1 +190 val_190 1 +191 NULL 1 +191 val_191 1 +192 NULL 1 +192 val_192 1 +193 NULL 1 +193 val_193 1 +194 NULL 1 +194 val_194 1 +195 NULL 1 +195 val_195 1 +196 NULL 1 +196 val_196 1 +197 NULL 1 +197 val_197 1 +199 NULL 1 +199 val_199 1 +2 NULL 1 +2 val_2 1 +20 NULL 1 +20 val_20 1 +200 NULL 1 +200 val_200 1 +201 NULL 1 +201 val_201 1 +202 NULL 1 +202 val_202 1 +203 NULL 1 +203 val_203 1 +205 NULL 1 +205 val_205 1 +207 NULL 1 +207 val_207 1 +208 NULL 1 +208 val_208 1 +209 NULL 1 +209 val_209 1 +213 NULL 1 +213 val_213 1 +214 NULL 1 +214 val_214 1 +216 NULL 1 +216 val_216 1 +217 NULL 1 +217 val_217 1 +218 NULL 1 +218 val_218 1 +219 NULL 1 +219 val_219 1 +221 NULL 1 +221 val_221 1 +222 NULL 1 +222 val_222 1 +223 NULL 1 +223 val_223 1 +224 NULL 1 +224 val_224 1 +226 NULL 1 +226 val_226 1 +228 NULL 1 +228 val_228 1 +229 NULL 1 +229 val_229 1 +230 NULL 1 +230 val_230 1 +233 NULL 1 +233 val_233 1 +235 NULL 1 +235 val_235 1 +237 NULL 1 +237 val_237 1 +238 NULL 1 +238 val_238 1 +239 NULL 1 +239 val_239 1 +24 NULL 1 +24 val_24 1 +241 NULL 1 +241 val_241 1 +242 NULL 1 +242 val_242 1 +244 NULL 1 +244 val_244 1 +247 NULL 1 +247 val_247 1 +248 NULL 1 +248 val_248 1 +249 NULL 1 +249 val_249 1 +252 NULL 1 +252 val_252 1 +255 NULL 1 +255 val_255 1 +256 NULL 1 +256 val_256 1 +257 NULL 1 +257 val_257 1 +258 NULL 1 +258 val_258 1 +26 NULL 1 +26 val_26 1 +260 NULL 1 +260 val_260 1 +262 NULL 1 +262 val_262 1 +263 NULL 1 +263 val_263 1 +265 NULL 1 +265 val_265 1 +266 NULL 1 +266 val_266 1 +27 NULL 1 +27 val_27 1 +272 NULL 1 +272 val_272 1 +273 NULL 1 +273 val_273 1 +274 NULL 1 +274 val_274 1 +275 NULL 1 +275 val_275 1 +277 NULL 1 +277 val_277 1 +278 NULL 1 +278 val_278 1 +28 NULL 1 +28 val_28 1 +280 NULL 1 +280 val_280 1 +281 NULL 1 +281 val_281 1 +282 NULL 1 +282 val_282 1 +283 NULL 1 +283 val_283 1 +284 NULL 1 +284 val_284 1 +285 NULL 1 +285 val_285 1 +286 NULL 1 +286 val_286 1 +287 NULL 1 +287 val_287 1 +288 NULL 1 +288 val_288 1 +289 NULL 1 +289 val_289 1 +291 NULL 1 +291 val_291 1 +292 NULL 1 +292 val_292 1 +296 NULL 1 +296 val_296 1 +298 NULL 1 +298 val_298 1 +30 NULL 1 +30 val_30 1 +302 NULL 1 +302 val_302 1 +305 NULL 1 +305 val_305 1 +306 NULL 1 +306 val_306 1 +307 NULL 1 +307 val_307 1 +308 NULL 1 +308 val_308 1 +309 NULL 1 +309 val_309 1 +310 NULL 1 +310 val_310 1 +311 NULL 1 +311 val_311 1 +315 NULL 1 +315 val_315 1 +316 NULL 1 +316 val_316 1 +317 NULL 1 +317 val_317 1 +318 NULL 1 +318 val_318 1 +321 NULL 1 +321 val_321 1 +322 NULL 1 +322 val_322 1 +323 NULL 1 +323 val_323 1 +325 NULL 1 +325 val_325 1 +327 NULL 1 +327 val_327 1 +33 NULL 1 +33 val_33 1 +331 NULL 1 +331 val_331 1 +332 NULL 1 +332 val_332 1 +333 NULL 1 +333 val_333 1 +335 NULL 1 +335 val_335 1 +336 NULL 1 +336 val_336 1 +338 NULL 1 +338 val_338 1 +339 NULL 1 +339 val_339 1 +34 NULL 1 +34 val_34 1 +341 NULL 1 +341 val_341 1 +342 NULL 1 +342 val_342 1 +344 NULL 1 +344 val_344 1 +345 NULL 1 +345 val_345 1 +348 NULL 1 +348 val_348 1 +35 NULL 1 +35 val_35 1 +351 NULL 1 +351 val_351 1 +353 NULL 1 +353 val_353 1 +356 NULL 1 +356 val_356 1 +360 NULL 1 +360 val_360 1 +362 NULL 1 +362 val_362 1 +364 NULL 1 +364 val_364 1 +365 NULL 1 +365 val_365 1 +366 NULL 1 +366 val_366 1 +367 NULL 1 +367 val_367 1 +368 NULL 1 +368 val_368 1 +369 NULL 1 +369 val_369 1 +37 NULL 1 +37 val_37 1 +373 NULL 1 +373 val_373 1 +374 NULL 1 +374 val_374 1 +375 NULL 1 +375 val_375 1 +377 NULL 1 +377 val_377 1 +378 NULL 1 +378 val_378 1 +379 NULL 1 +379 val_379 1 +382 NULL 1 +382 val_382 1 +384 NULL 1 +384 val_384 1 +386 NULL 1 +386 val_386 1 +389 NULL 1 +389 val_389 1 +392 NULL 1 +392 val_392 1 +393 NULL 1 +393 val_393 1 +394 NULL 1 +394 val_394 1 +395 NULL 1 +395 val_395 1 +396 NULL 1 +396 val_396 1 +397 NULL 1 +397 val_397 1 +399 NULL 1 +399 val_399 1 +4 NULL 1 +4 val_4 1 +400 NULL 1 +400 val_400 1 +401 NULL 1 +401 val_401 1 +402 NULL 1 +402 val_402 1 +403 NULL 1 +403 val_403 1 +404 NULL 1 +404 val_404 1 +406 NULL 1 +406 val_406 1 +407 NULL 1 +407 val_407 1 +409 NULL 1 +409 val_409 1 +41 NULL 1 +41 val_41 1 +411 NULL 1 +411 val_411 1 +413 NULL 1 +413 val_413 1 +414 NULL 1 +414 val_414 1 +417 NULL 1 +417 val_417 1 +418 NULL 1 +418 val_418 1 +419 NULL 1 +419 val_419 1 +42 NULL 1 +42 val_42 1 +421 NULL 1 +421 val_421 1 +424 NULL 1 +424 val_424 1 +427 NULL 1 +427 val_427 1 +429 NULL 1 +429 val_429 1 +43 NULL 1 +43 val_43 1 +430 NULL 1 +430 val_430 1 +431 NULL 1 +431 val_431 1 +432 NULL 1 +432 val_432 1 +435 NULL 1 +435 val_435 1 +436 NULL 1 +436 val_436 1 +437 NULL 1 +437 val_437 1 +438 NULL 1 +438 val_438 1 +439 NULL 1 +439 val_439 1 +44 NULL 1 +44 val_44 1 +443 NULL 1 +443 val_443 1 +444 NULL 1 +444 val_444 1 +446 NULL 1 +446 val_446 1 +448 NULL 1 +448 val_448 1 +449 NULL 1 +449 val_449 1 +452 NULL 1 +452 val_452 1 +453 NULL 1 +453 val_453 1 +454 NULL 1 +454 val_454 1 +455 NULL 1 +455 val_455 1 +457 NULL 1 +457 val_457 1 +458 NULL 1 +458 val_458 1 +459 NULL 1 +459 val_459 1 +460 NULL 1 +460 val_460 1 +462 NULL 1 +462 val_462 1 +463 NULL 1 +463 val_463 1 +466 NULL 1 +466 val_466 1 +467 NULL 1 +467 val_467 1 +468 NULL 1 +468 val_468 1 +469 NULL 1 +469 val_469 1 +47 NULL 1 +47 val_47 1 +470 NULL 1 +470 val_470 1 +472 NULL 1 +472 val_472 1 +475 NULL 1 +475 val_475 1 +477 NULL 1 +477 val_477 1 +478 NULL 1 +478 val_478 1 +479 NULL 1 +479 val_479 1 +480 NULL 1 +480 val_480 1 +481 NULL 1 +481 val_481 1 +482 NULL 1 +482 val_482 1 +483 NULL 1 +483 val_483 1 +484 NULL 1 +484 val_484 1 +485 NULL 1 +485 val_485 1 +487 NULL 1 +487 val_487 1 +489 NULL 1 +489 val_489 1 +490 NULL 1 +490 val_490 1 +491 NULL 1 +491 val_491 1 +492 NULL 1 +492 val_492 1 +493 NULL 1 +493 val_493 1 +494 NULL 1 +494 val_494 1 +495 NULL 1 +495 val_495 1 +496 NULL 1 +496 val_496 1 +497 NULL 1 +497 val_497 1 +498 NULL 1 +498 val_498 1 +5 NULL 1 +5 val_5 1 +51 NULL 1 +51 val_51 1 +53 NULL 1 +53 val_53 1 +54 NULL 1 +54 val_54 1 +57 NULL 1 +57 val_57 1 +58 NULL 1 +58 val_58 1 +64 NULL 1 +64 val_64 1 +65 NULL 1 +65 val_65 1 +66 NULL 1 +66 val_66 1 +67 NULL 1 +67 val_67 1 +69 NULL 1 +69 val_69 1 +70 NULL 1 +70 val_70 1 +72 NULL 1 +72 val_72 1 +74 NULL 1 +74 val_74 1 +76 NULL 1 +76 val_76 1 +77 NULL 1 +77 val_77 1 +78 NULL 1 +78 val_78 1 +8 NULL 1 +8 val_8 1 +80 NULL 1 +80 val_80 1 +82 NULL 1 +82 val_82 1 +83 NULL 1 +83 val_83 1 +84 NULL 1 +84 val_84 1 +85 NULL 1 +85 val_85 1 +86 NULL 1 +86 val_86 1 +87 NULL 1 +87 val_87 1 +9 NULL 1 +9 val_9 1 +90 NULL 1 +90 val_90 1 +92 NULL 1 +92 val_92 1 +95 NULL 1 +95 val_95 1 +96 NULL 1 +96 val_96 1 +97 NULL 1 +97 val_97 1 +98 NULL 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTION COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-24_532_4196964248421480997/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-38_348_5252785707815916986/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-38_348_5252785707815916986/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 500 +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +12 2 +120 2 +125 2 +126 1 +128 3 +129 2 +131 1 +133 1 +134 2 +136 1 +137 2 +138 4 +143 1 +145 1 +146 2 +149 2 +15 2 +150 1 +152 2 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 2 +165 2 +166 1 +167 3 +168 1 +169 4 +17 1 +170 1 +172 2 +174 2 +175 2 +176 2 +177 1 +178 1 +179 2 +18 2 +180 1 +181 1 +183 1 +186 1 +187 3 +189 1 +19 1 +190 1 +191 2 +192 1 +193 3 +194 1 +195 2 +196 1 +197 2 +199 3 +2 1 +20 1 +200 2 +201 1 +202 1 +203 2 +205 2 +207 2 +208 3 +209 2 +213 2 +214 1 +216 2 +217 2 +218 1 +219 2 +221 2 +222 1 +223 2 +224 2 +226 1 +228 1 +229 2 +230 5 +233 2 +235 1 +237 2 +238 2 +239 2 +24 2 +241 1 +242 2 +244 1 +247 1 +248 1 +249 1 +252 1 +255 2 +256 2 +257 1 +258 1 +26 2 +260 1 +262 1 +263 1 +265 2 +266 1 +27 1 +272 2 +273 3 +274 1 +275 1 +277 4 +278 2 +28 1 +280 2 +281 2 +282 2 +283 1 +284 1 +285 1 +286 1 +287 1 +288 2 +289 1 +291 1 +292 1 +296 1 +298 3 +30 1 +302 1 +305 1 +306 1 +307 2 +308 1 +309 2 +310 1 +311 3 +315 1 +316 3 +317 2 +318 3 +321 2 +322 2 +323 1 +325 2 +327 3 +33 1 +331 2 +332 1 +333 2 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 2 +344 2 +345 1 +348 5 +35 3 +351 1 +353 2 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 2 +368 1 +369 3 +37 2 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 2 +384 3 +386 1 +389 1 +392 1 +393 1 +394 1 +395 2 +396 3 +397 2 +399 2 +4 1 +400 1 +401 5 +402 1 +403 3 +404 2 +406 4 +407 1 +409 3 +41 1 +411 1 +413 2 +414 2 +417 3 +418 1 +419 1 +42 2 +421 1 +424 2 +427 1 +429 2 +43 1 +430 3 +431 3 +432 1 +435 1 +436 1 +437 1 +438 3 +439 2 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +47 1 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +5 3 +51 2 +53 1 +54 1 +57 1 +58 2 +64 1 +65 1 +66 1 +67 2 +69 1 +70 3 +72 2 +74 1 +76 2 +77 1 +78 1 +8 1 +80 1 +82 1 +83 2 +84 2 +85 1 +86 1 +87 1 +9 1 +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-38_490_7895989105255845341/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-38_490_7895989105255845341/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 500 +0 NULL 3 +0 val_0 3 +10 NULL 1 +10 val_10 1 +100 NULL 2 +100 val_100 2 +103 NULL 2 +103 val_103 2 +104 NULL 2 +104 val_104 2 +105 NULL 1 +105 val_105 1 +11 NULL 1 +11 val_11 1 +111 NULL 1 +111 val_111 1 +113 NULL 2 +113 val_113 2 +114 NULL 1 +114 val_114 1 +116 NULL 1 +116 val_116 1 +118 NULL 2 +118 val_118 2 +119 NULL 3 +119 val_119 3 +12 NULL 2 +12 val_12 2 +120 NULL 2 +120 val_120 2 +125 NULL 2 +125 val_125 2 +126 NULL 1 +126 val_126 1 +128 NULL 3 +128 val_128 3 +129 NULL 2 +129 val_129 2 +131 NULL 1 +131 val_131 1 +133 NULL 1 +133 val_133 1 +134 NULL 2 +134 val_134 2 +136 NULL 1 +136 val_136 1 +137 NULL 2 +137 val_137 2 +138 NULL 4 +138 val_138 4 +143 NULL 1 +143 val_143 1 +145 NULL 1 +145 val_145 1 +146 NULL 2 +146 val_146 2 +149 NULL 2 +149 val_149 2 +15 NULL 2 +15 val_15 2 +150 NULL 1 +150 val_150 1 +152 NULL 2 +152 val_152 2 +153 NULL 1 +153 val_153 1 +155 NULL 1 +155 val_155 1 +156 NULL 1 +156 val_156 1 +157 NULL 1 +157 val_157 1 +158 NULL 1 +158 val_158 1 +160 NULL 1 +160 val_160 1 +162 NULL 1 +162 val_162 1 +163 NULL 1 +163 val_163 1 +164 NULL 2 +164 val_164 2 +165 NULL 2 +165 val_165 2 +166 NULL 1 +166 val_166 1 +167 NULL 3 +167 val_167 3 +168 NULL 1 +168 val_168 1 +169 NULL 4 +169 val_169 4 +17 NULL 1 +17 val_17 1 +170 NULL 1 +170 val_170 1 +172 NULL 2 +172 val_172 2 +174 NULL 2 +174 val_174 2 +175 NULL 2 +175 val_175 2 +176 NULL 2 +176 val_176 2 +177 NULL 1 +177 val_177 1 +178 NULL 1 +178 val_178 1 +179 NULL 2 +179 val_179 2 +18 NULL 2 +18 val_18 2 +180 NULL 1 +180 val_180 1 +181 NULL 1 +181 val_181 1 +183 NULL 1 +183 val_183 1 +186 NULL 1 +186 val_186 1 +187 NULL 3 +187 val_187 3 +189 NULL 1 +189 val_189 1 +19 NULL 1 +19 val_19 1 +190 NULL 1 +190 val_190 1 +191 NULL 2 +191 val_191 2 +192 NULL 1 +192 val_192 1 +193 NULL 3 +193 val_193 3 +194 NULL 1 +194 val_194 1 +195 NULL 2 +195 val_195 2 +196 NULL 1 +196 val_196 1 +197 NULL 2 +197 val_197 2 +199 NULL 3 +199 val_199 3 +2 NULL 1 +2 val_2 1 +20 NULL 1 +20 val_20 1 +200 NULL 2 +200 val_200 2 +201 NULL 1 +201 val_201 1 +202 NULL 1 +202 val_202 1 +203 NULL 2 +203 val_203 2 +205 NULL 2 +205 val_205 2 +207 NULL 2 +207 val_207 2 +208 NULL 3 +208 val_208 3 +209 NULL 2 +209 val_209 2 +213 NULL 2 +213 val_213 2 +214 NULL 1 +214 val_214 1 +216 NULL 2 +216 val_216 2 +217 NULL 2 +217 val_217 2 +218 NULL 1 +218 val_218 1 +219 NULL 2 +219 val_219 2 +221 NULL 2 +221 val_221 2 +222 NULL 1 +222 val_222 1 +223 NULL 2 +223 val_223 2 +224 NULL 2 +224 val_224 2 +226 NULL 1 +226 val_226 1 +228 NULL 1 +228 val_228 1 +229 NULL 2 +229 val_229 2 +230 NULL 5 +230 val_230 5 +233 NULL 2 +233 val_233 2 +235 NULL 1 +235 val_235 1 +237 NULL 2 +237 val_237 2 +238 NULL 2 +238 val_238 2 +239 NULL 2 +239 val_239 2 +24 NULL 2 +24 val_24 2 +241 NULL 1 +241 val_241 1 +242 NULL 2 +242 val_242 2 +244 NULL 1 +244 val_244 1 +247 NULL 1 +247 val_247 1 +248 NULL 1 +248 val_248 1 +249 NULL 1 +249 val_249 1 +252 NULL 1 +252 val_252 1 +255 NULL 2 +255 val_255 2 +256 NULL 2 +256 val_256 2 +257 NULL 1 +257 val_257 1 +258 NULL 1 +258 val_258 1 +26 NULL 2 +26 val_26 2 +260 NULL 1 +260 val_260 1 +262 NULL 1 +262 val_262 1 +263 NULL 1 +263 val_263 1 +265 NULL 2 +265 val_265 2 +266 NULL 1 +266 val_266 1 +27 NULL 1 +27 val_27 1 +272 NULL 2 +272 val_272 2 +273 NULL 3 +273 val_273 3 +274 NULL 1 +274 val_274 1 +275 NULL 1 +275 val_275 1 +277 NULL 4 +277 val_277 4 +278 NULL 2 +278 val_278 2 +28 NULL 1 +28 val_28 1 +280 NULL 2 +280 val_280 2 +281 NULL 2 +281 val_281 2 +282 NULL 2 +282 val_282 2 +283 NULL 1 +283 val_283 1 +284 NULL 1 +284 val_284 1 +285 NULL 1 +285 val_285 1 +286 NULL 1 +286 val_286 1 +287 NULL 1 +287 val_287 1 +288 NULL 2 +288 val_288 2 +289 NULL 1 +289 val_289 1 +291 NULL 1 +291 val_291 1 +292 NULL 1 +292 val_292 1 +296 NULL 1 +296 val_296 1 +298 NULL 3 +298 val_298 3 +30 NULL 1 +30 val_30 1 +302 NULL 1 +302 val_302 1 +305 NULL 1 +305 val_305 1 +306 NULL 1 +306 val_306 1 +307 NULL 2 +307 val_307 2 +308 NULL 1 +308 val_308 1 +309 NULL 2 +309 val_309 2 +310 NULL 1 +310 val_310 1 +311 NULL 3 +311 val_311 3 +315 NULL 1 +315 val_315 1 +316 NULL 3 +316 val_316 3 +317 NULL 2 +317 val_317 2 +318 NULL 3 +318 val_318 3 +321 NULL 2 +321 val_321 2 +322 NULL 2 +322 val_322 2 +323 NULL 1 +323 val_323 1 +325 NULL 2 +325 val_325 2 +327 NULL 3 +327 val_327 3 +33 NULL 1 +33 val_33 1 +331 NULL 2 +331 val_331 2 +332 NULL 1 +332 val_332 1 +333 NULL 2 +333 val_333 2 +335 NULL 1 +335 val_335 1 +336 NULL 1 +336 val_336 1 +338 NULL 1 +338 val_338 1 +339 NULL 1 +339 val_339 1 +34 NULL 1 +34 val_34 1 +341 NULL 1 +341 val_341 1 +342 NULL 2 +342 val_342 2 +344 NULL 2 +344 val_344 2 +345 NULL 1 +345 val_345 1 +348 NULL 5 +348 val_348 5 +35 NULL 3 +35 val_35 3 +351 NULL 1 +351 val_351 1 +353 NULL 2 +353 val_353 2 +356 NULL 1 +356 val_356 1 +360 NULL 1 +360 val_360 1 +362 NULL 1 +362 val_362 1 +364 NULL 1 +364 val_364 1 +365 NULL 1 +365 val_365 1 +366 NULL 1 +366 val_366 1 +367 NULL 2 +367 val_367 2 +368 NULL 1 +368 val_368 1 +369 NULL 3 +369 val_369 3 +37 NULL 2 +37 val_37 2 +373 NULL 1 +373 val_373 1 +374 NULL 1 +374 val_374 1 +375 NULL 1 +375 val_375 1 +377 NULL 1 +377 val_377 1 +378 NULL 1 +378 val_378 1 +379 NULL 1 +379 val_379 1 +382 NULL 2 +382 val_382 2 +384 NULL 3 +384 val_384 3 +386 NULL 1 +386 val_386 1 +389 NULL 1 +389 val_389 1 +392 NULL 1 +392 val_392 1 +393 NULL 1 +393 val_393 1 +394 NULL 1 +394 val_394 1 +395 NULL 2 +395 val_395 2 +396 NULL 3 +396 val_396 3 +397 NULL 2 +397 val_397 2 +399 NULL 2 +399 val_399 2 +4 NULL 1 +4 val_4 1 +400 NULL 1 +400 val_400 1 +401 NULL 5 +401 val_401 5 +402 NULL 1 +402 val_402 1 +403 NULL 3 +403 val_403 3 +404 NULL 2 +404 val_404 2 +406 NULL 4 +406 val_406 4 +407 NULL 1 +407 val_407 1 +409 NULL 3 +409 val_409 3 +41 NULL 1 +41 val_41 1 +411 NULL 1 +411 val_411 1 +413 NULL 2 +413 val_413 2 +414 NULL 2 +414 val_414 2 +417 NULL 3 +417 val_417 3 +418 NULL 1 +418 val_418 1 +419 NULL 1 +419 val_419 1 +42 NULL 2 +42 val_42 2 +421 NULL 1 +421 val_421 1 +424 NULL 2 +424 val_424 2 +427 NULL 1 +427 val_427 1 +429 NULL 2 +429 val_429 2 +43 NULL 1 +43 val_43 1 +430 NULL 3 +430 val_430 3 +431 NULL 3 +431 val_431 3 +432 NULL 1 +432 val_432 1 +435 NULL 1 +435 val_435 1 +436 NULL 1 +436 val_436 1 +437 NULL 1 +437 val_437 1 +438 NULL 3 +438 val_438 3 +439 NULL 2 +439 val_439 2 +44 NULL 1 +44 val_44 1 +443 NULL 1 +443 val_443 1 +444 NULL 1 +444 val_444 1 +446 NULL 1 +446 val_446 1 +448 NULL 1 +448 val_448 1 +449 NULL 1 +449 val_449 1 +452 NULL 1 +452 val_452 1 +453 NULL 1 +453 val_453 1 +454 NULL 3 +454 val_454 3 +455 NULL 1 +455 val_455 1 +457 NULL 1 +457 val_457 1 +458 NULL 2 +458 val_458 2 +459 NULL 2 +459 val_459 2 +460 NULL 1 +460 val_460 1 +462 NULL 2 +462 val_462 2 +463 NULL 2 +463 val_463 2 +466 NULL 3 +466 val_466 3 +467 NULL 1 +467 val_467 1 +468 NULL 4 +468 val_468 4 +469 NULL 5 +469 val_469 5 +47 NULL 1 +47 val_47 1 +470 NULL 1 +470 val_470 1 +472 NULL 1 +472 val_472 1 +475 NULL 1 +475 val_475 1 +477 NULL 1 +477 val_477 1 +478 NULL 2 +478 val_478 2 +479 NULL 1 +479 val_479 1 +480 NULL 3 +480 val_480 3 +481 NULL 1 +481 val_481 1 +482 NULL 1 +482 val_482 1 +483 NULL 1 +483 val_483 1 +484 NULL 1 +484 val_484 1 +485 NULL 1 +485 val_485 1 +487 NULL 1 +487 val_487 1 +489 NULL 4 +489 val_489 4 +490 NULL 1 +490 val_490 1 +491 NULL 1 +491 val_491 1 +492 NULL 2 +492 val_492 2 +493 NULL 1 +493 val_493 1 +494 NULL 1 +494 val_494 1 +495 NULL 1 +495 val_495 1 +496 NULL 1 +496 val_496 1 +497 NULL 1 +497 val_497 1 +498 NULL 3 +498 val_498 3 +5 NULL 3 +5 val_5 3 +51 NULL 2 +51 val_51 2 +53 NULL 1 +53 val_53 1 +54 NULL 1 +54 val_54 1 +57 NULL 1 +57 val_57 1 +58 NULL 2 +58 val_58 2 +64 NULL 1 +64 val_64 1 +65 NULL 1 +65 val_65 1 +66 NULL 1 +66 val_66 1 +67 NULL 2 +67 val_67 2 +69 NULL 1 +69 val_69 1 +70 NULL 3 +70 val_70 3 +72 NULL 2 +72 val_72 2 +74 NULL 1 +74 val_74 1 +76 NULL 2 +76 val_76 2 +77 NULL 1 +77 val_77 1 +78 NULL 1 +78 val_78 1 +8 NULL 1 +8 val_8 1 +80 NULL 1 +80 val_80 1 +82 NULL 1 +82 val_82 1 +83 NULL 2 +83 val_83 2 +84 NULL 2 +84 val_84 2 +85 NULL 1 +85 val_85 1 +86 NULL 1 +86 val_86 1 +87 NULL 1 +87 val_87 1 +9 NULL 1 +9 val_9 1 +90 NULL 3 +90 val_90 3 +92 NULL 1 +92 val_92 1 +95 NULL 2 +95 val_95 2 +96 NULL 1 +96 val_96 1 +97 NULL 2 +97 val_97 2 +98 NULL 2 +98 val_98 2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) value)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) value) (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-38_676_3386485891757405528/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + sort order: +++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col2:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col1 + type: string + expr: _col0 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-53_189_7587704048262375160/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-53_189_7587704048262375160/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-53_350_8653115157722259984/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-18-53_350_8653115157722259984/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL NULL 309 +NULL val_0 1 +0 val_0 1 +NULL val_10 1 +10 val_10 1 +NULL val_100 1 +100 val_100 1 +NULL val_103 1 +103 val_103 1 +NULL val_104 1 +104 val_104 1 +NULL val_105 1 +105 val_105 1 +NULL val_11 1 +11 val_11 1 +NULL val_111 1 +111 val_111 1 +NULL val_113 1 +113 val_113 1 +NULL val_114 1 +114 val_114 1 +NULL val_116 1 +116 val_116 1 +NULL val_118 1 +118 val_118 1 +NULL val_119 1 +119 val_119 1 +NULL val_12 1 +12 val_12 1 +NULL val_120 1 +120 val_120 1 +NULL val_125 1 +125 val_125 1 +NULL val_126 1 +126 val_126 1 +NULL val_128 1 +128 val_128 1 +NULL val_129 1 +129 val_129 1 +NULL val_131 1 +131 val_131 1 +NULL val_133 1 +133 val_133 1 +NULL val_134 1 +134 val_134 1 +NULL val_136 1 +136 val_136 1 +NULL val_137 1 +137 val_137 1 +NULL val_138 1 +138 val_138 1 +NULL val_143 1 +143 val_143 1 +NULL val_145 1 +145 val_145 1 +NULL val_146 1 +146 val_146 1 +NULL val_149 1 +149 val_149 1 +NULL val_15 1 +15 val_15 1 +NULL val_150 1 +150 val_150 1 +NULL val_152 1 +152 val_152 1 +NULL val_153 1 +153 val_153 1 +NULL val_155 1 +155 val_155 1 +NULL val_156 1 +156 val_156 1 +NULL val_157 1 +157 val_157 1 +NULL val_158 1 +158 val_158 1 +NULL val_160 1 +160 val_160 1 +NULL val_162 1 +162 val_162 1 +NULL val_163 1 +163 val_163 1 +NULL val_164 1 +164 val_164 1 +NULL val_165 1 +165 val_165 1 +NULL val_166 1 +166 val_166 1 +NULL val_167 1 +167 val_167 1 +NULL val_168 1 +168 val_168 1 +NULL val_169 1 +169 val_169 1 +NULL val_17 1 +17 val_17 1 +NULL val_170 1 +170 val_170 1 +NULL val_172 1 +172 val_172 1 +NULL val_174 1 +174 val_174 1 +NULL val_175 1 +175 val_175 1 +NULL val_176 1 +176 val_176 1 +NULL val_177 1 +177 val_177 1 +NULL val_178 1 +178 val_178 1 +NULL val_179 1 +179 val_179 1 +NULL val_18 1 +18 val_18 1 +NULL val_180 1 +180 val_180 1 +NULL val_181 1 +181 val_181 1 +NULL val_183 1 +183 val_183 1 +NULL val_186 1 +186 val_186 1 +NULL val_187 1 +187 val_187 1 +NULL val_189 1 +189 val_189 1 +NULL val_19 1 +19 val_19 1 +NULL val_190 1 +190 val_190 1 +NULL val_191 1 +191 val_191 1 +NULL val_192 1 +192 val_192 1 +NULL val_193 1 +193 val_193 1 +NULL val_194 1 +194 val_194 1 +NULL val_195 1 +195 val_195 1 +NULL val_196 1 +196 val_196 1 +NULL val_197 1 +197 val_197 1 +NULL val_199 1 +199 val_199 1 +NULL val_2 1 +2 val_2 1 +NULL val_20 1 +20 val_20 1 +NULL val_200 1 +200 val_200 1 +NULL val_201 1 +201 val_201 1 +NULL val_202 1 +202 val_202 1 +NULL val_203 1 +203 val_203 1 +NULL val_205 1 +205 val_205 1 +NULL val_207 1 +207 val_207 1 +NULL val_208 1 +208 val_208 1 +NULL val_209 1 +209 val_209 1 +NULL val_213 1 +213 val_213 1 +NULL val_214 1 +214 val_214 1 +NULL val_216 1 +216 val_216 1 +NULL val_217 1 +217 val_217 1 +NULL val_218 1 +218 val_218 1 +NULL val_219 1 +219 val_219 1 +NULL val_221 1 +221 val_221 1 +NULL val_222 1 +222 val_222 1 +NULL val_223 1 +223 val_223 1 +NULL val_224 1 +224 val_224 1 +NULL val_226 1 +226 val_226 1 +NULL val_228 1 +228 val_228 1 +NULL val_229 1 +229 val_229 1 +NULL val_230 1 +230 val_230 1 +NULL val_233 1 +233 val_233 1 +NULL val_235 1 +235 val_235 1 +NULL val_237 1 +237 val_237 1 +NULL val_238 1 +238 val_238 1 +NULL val_239 1 +239 val_239 1 +NULL val_24 1 +24 val_24 1 +NULL val_241 1 +241 val_241 1 +NULL val_242 1 +242 val_242 1 +NULL val_244 1 +244 val_244 1 +NULL val_247 1 +247 val_247 1 +NULL val_248 1 +248 val_248 1 +NULL val_249 1 +249 val_249 1 +NULL val_252 1 +252 val_252 1 +NULL val_255 1 +255 val_255 1 +NULL val_256 1 +256 val_256 1 +NULL val_257 1 +257 val_257 1 +NULL val_258 1 +258 val_258 1 +NULL val_26 1 +26 val_26 1 +NULL val_260 1 +260 val_260 1 +NULL val_262 1 +262 val_262 1 +NULL val_263 1 +263 val_263 1 +NULL val_265 1 +265 val_265 1 +NULL val_266 1 +266 val_266 1 +NULL val_27 1 +27 val_27 1 +NULL val_272 1 +272 val_272 1 +NULL val_273 1 +273 val_273 1 +NULL val_274 1 +274 val_274 1 +NULL val_275 1 +275 val_275 1 +NULL val_277 1 +277 val_277 1 +NULL val_278 1 +278 val_278 1 +NULL val_28 1 +28 val_28 1 +NULL val_280 1 +280 val_280 1 +NULL val_281 1 +281 val_281 1 +NULL val_282 1 +282 val_282 1 +NULL val_283 1 +283 val_283 1 +NULL val_284 1 +284 val_284 1 +NULL val_285 1 +285 val_285 1 +NULL val_286 1 +286 val_286 1 +NULL val_287 1 +287 val_287 1 +NULL val_288 1 +288 val_288 1 +NULL val_289 1 +289 val_289 1 +NULL val_291 1 +291 val_291 1 +NULL val_292 1 +292 val_292 1 +NULL val_296 1 +296 val_296 1 +NULL val_298 1 +298 val_298 1 +NULL val_30 1 +30 val_30 1 +NULL val_302 1 +302 val_302 1 +NULL val_305 1 +305 val_305 1 +NULL val_306 1 +306 val_306 1 +NULL val_307 1 +307 val_307 1 +NULL val_308 1 +308 val_308 1 +NULL val_309 1 +309 val_309 1 +NULL val_310 1 +310 val_310 1 +NULL val_311 1 +311 val_311 1 +NULL val_315 1 +315 val_315 1 +NULL val_316 1 +316 val_316 1 +NULL val_317 1 +317 val_317 1 +NULL val_318 1 +318 val_318 1 +NULL val_321 1 +321 val_321 1 +NULL val_322 1 +322 val_322 1 +NULL val_323 1 +323 val_323 1 +NULL val_325 1 +325 val_325 1 +NULL val_327 1 +327 val_327 1 +NULL val_33 1 +33 val_33 1 +NULL val_331 1 +331 val_331 1 +NULL val_332 1 +332 val_332 1 +NULL val_333 1 +333 val_333 1 +NULL val_335 1 +335 val_335 1 +NULL val_336 1 +336 val_336 1 +NULL val_338 1 +338 val_338 1 +NULL val_339 1 +339 val_339 1 +NULL val_34 1 +34 val_34 1 +NULL val_341 1 +341 val_341 1 +NULL val_342 1 +342 val_342 1 +NULL val_344 1 +344 val_344 1 +NULL val_345 1 +345 val_345 1 +NULL val_348 1 +348 val_348 1 +NULL val_35 1 +35 val_35 1 +NULL val_351 1 +351 val_351 1 +NULL val_353 1 +353 val_353 1 +NULL val_356 1 +356 val_356 1 +NULL val_360 1 +360 val_360 1 +NULL val_362 1 +362 val_362 1 +NULL val_364 1 +364 val_364 1 +NULL val_365 1 +365 val_365 1 +NULL val_366 1 +366 val_366 1 +NULL val_367 1 +367 val_367 1 +NULL val_368 1 +368 val_368 1 +NULL val_369 1 +369 val_369 1 +NULL val_37 1 +37 val_37 1 +NULL val_373 1 +373 val_373 1 +NULL val_374 1 +374 val_374 1 +NULL val_375 1 +375 val_375 1 +NULL val_377 1 +377 val_377 1 +NULL val_378 1 +378 val_378 1 +NULL val_379 1 +379 val_379 1 +NULL val_382 1 +382 val_382 1 +NULL val_384 1 +384 val_384 1 +NULL val_386 1 +386 val_386 1 +NULL val_389 1 +389 val_389 1 +NULL val_392 1 +392 val_392 1 +NULL val_393 1 +393 val_393 1 +NULL val_394 1 +394 val_394 1 +NULL val_395 1 +395 val_395 1 +NULL val_396 1 +396 val_396 1 +NULL val_397 1 +397 val_397 1 +NULL val_399 1 +399 val_399 1 +NULL val_4 1 +4 val_4 1 +NULL val_400 1 +400 val_400 1 +NULL val_401 1 +401 val_401 1 +NULL val_402 1 +402 val_402 1 +NULL val_403 1 +403 val_403 1 +NULL val_404 1 +404 val_404 1 +NULL val_406 1 +406 val_406 1 +NULL val_407 1 +407 val_407 1 +NULL val_409 1 +409 val_409 1 +NULL val_41 1 +41 val_41 1 +NULL val_411 1 +411 val_411 1 +NULL val_413 1 +413 val_413 1 +NULL val_414 1 +414 val_414 1 +NULL val_417 1 +417 val_417 1 +NULL val_418 1 +418 val_418 1 +NULL val_419 1 +419 val_419 1 +NULL val_42 1 +42 val_42 1 +NULL val_421 1 +421 val_421 1 +NULL val_424 1 +424 val_424 1 +NULL val_427 1 +427 val_427 1 +NULL val_429 1 +429 val_429 1 +NULL val_43 1 +43 val_43 1 +NULL val_430 1 +430 val_430 1 +NULL val_431 1 +431 val_431 1 +NULL val_432 1 +432 val_432 1 +NULL val_435 1 +435 val_435 1 +NULL val_436 1 +436 val_436 1 +NULL val_437 1 +437 val_437 1 +NULL val_438 1 +438 val_438 1 +NULL val_439 1 +439 val_439 1 +NULL val_44 1 +44 val_44 1 +NULL val_443 1 +443 val_443 1 +NULL val_444 1 +444 val_444 1 +NULL val_446 1 +446 val_446 1 +NULL val_448 1 +448 val_448 1 +NULL val_449 1 +449 val_449 1 +NULL val_452 1 +452 val_452 1 +NULL val_453 1 +453 val_453 1 +NULL val_454 1 +454 val_454 1 +NULL val_455 1 +455 val_455 1 +NULL val_457 1 +457 val_457 1 +NULL val_458 1 +458 val_458 1 +NULL val_459 1 +459 val_459 1 +NULL val_460 1 +460 val_460 1 +NULL val_462 1 +462 val_462 1 +NULL val_463 1 +463 val_463 1 +NULL val_466 1 +466 val_466 1 +NULL val_467 1 +467 val_467 1 +NULL val_468 1 +468 val_468 1 +NULL val_469 1 +469 val_469 1 +NULL val_47 1 +47 val_47 1 +NULL val_470 1 +470 val_470 1 +NULL val_472 1 +472 val_472 1 +NULL val_475 1 +475 val_475 1 +NULL val_477 1 +477 val_477 1 +NULL val_478 1 +478 val_478 1 +NULL val_479 1 +479 val_479 1 +NULL val_480 1 +480 val_480 1 +NULL val_481 1 +481 val_481 1 +NULL val_482 1 +482 val_482 1 +NULL val_483 1 +483 val_483 1 +NULL val_484 1 +484 val_484 1 +NULL val_485 1 +485 val_485 1 +NULL val_487 1 +487 val_487 1 +NULL val_489 1 +489 val_489 1 +NULL val_490 1 +490 val_490 1 +NULL val_491 1 +491 val_491 1 +NULL val_492 1 +492 val_492 1 +NULL val_493 1 +493 val_493 1 +NULL val_494 1 +494 val_494 1 +NULL val_495 1 +495 val_495 1 +NULL val_496 1 +496 val_496 1 +NULL val_497 1 +497 val_497 1 +NULL val_498 1 +498 val_498 1 +NULL val_5 1 +5 val_5 1 +NULL val_51 1 +51 val_51 1 +NULL val_53 1 +53 val_53 1 +NULL val_54 1 +54 val_54 1 +NULL val_57 1 +57 val_57 1 +NULL val_58 1 +58 val_58 1 +NULL val_64 1 +64 val_64 1 +NULL val_65 1 +65 val_65 1 +NULL val_66 1 +66 val_66 1 +NULL val_67 1 +67 val_67 1 +NULL val_69 1 +69 val_69 1 +NULL val_70 1 +70 val_70 1 +NULL val_72 1 +72 val_72 1 +NULL val_74 1 +74 val_74 1 +NULL val_76 1 +76 val_76 1 +NULL val_77 1 +77 val_77 1 +NULL val_78 1 +78 val_78 1 +NULL val_8 1 +8 val_8 1 +NULL val_80 1 +80 val_80 1 +NULL val_82 1 +82 val_82 1 +NULL val_83 1 +83 val_83 1 +NULL val_84 1 +84 val_84 1 +NULL val_85 1 +85 val_85 1 +NULL val_86 1 +86 val_86 1 +NULL val_87 1 +87 val_87 1 +NULL val_9 1 +9 val_9 1 +NULL val_90 1 +90 val_90 1 +NULL val_92 1 +92 val_92 1 +NULL val_95 1 +95 val_95 1 +NULL val_96 1 +96 val_96 1 +NULL val_97 1 +97 val_97 1 +NULL val_98 1 +98 val_98 1 Index: ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct_withrollup.q.out (revision 0) @@ -0,0 +1,197 @@ +PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + tag: -1 + value expressions: + expr: value + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1) + expr: sum(KEY._col1) + expr: sum(DISTINCT KEY._col1) + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-12-56_387_8905212162823898328/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + expr: _col4 + type: double + expr: _col5 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + expr: sum(DISTINCT KEY._col1:1._col0) + expr: count(VALUE._col3) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + expr: _col3 + type: double + expr: _col4 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + expr: UDFToInteger(_col3) + type: int + expr: UDFToInteger(_col4) + type: int + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-08_794_4899292286868200865/-mr-10000 +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-13-08_794_4899292286868200865/-mr-10000 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL 79136 500 +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 Index: ql/src/test/results/clientpositive/groupby8_map_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_map_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_map_withrollup.q.out (revision 0) @@ -0,0 +1,867 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-29_846_6422754565721906082/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-43_365_6420774695068808152/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-43_365_6420774695068808152/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-43_502_2829850728482786063/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-53-43_502_2829850728482786063/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 309 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby7_map_skew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_map_skew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_map_skew_withrollup.q.out (revision 0) @@ -0,0 +1,927 @@ +PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL SRC) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: double + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-14-42_401_7904921071087091437/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-14-42_401_7904921071087091437/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-14-42_401_7904921071087091437/-mr-10006 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: true + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-7 + Stats-Aggr Operator + + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-05_427_5220010122211558399/-mr-10000 +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-05_427_5220010122211558399/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-05_591_6687693680278108489/-mr-10000 +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-15-05_591_6687693680278108489/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby10_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby10_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby10_withrollup.q.out (revision 0) @@ -0,0 +1,907 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@INPUT +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE INPUT +PREHOOK: type: LOAD +PREHOOK: Output: default@input +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE INPUT +POSTHOOK: type: LOAD +POSTHOOK: Output: default@input +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + input + TableScan + alias: input + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: int + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: int + expr: substr(value, 5) + type: string + tag: -1 + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1) + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-09-38_574_7176829957510063479/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-09-38_574_7176829957510063479/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-09-38_574_7176829957510063479/-mr-10006 + Reduce Output Operator + key expressions: + expr: key + type: int + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: int + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(KEY._col1) + expr: sum(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-09-38_574_7176829957510063479/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-09-38_574_7176829957510063479/-mr-10008 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: double + expr: _col2 + type: double + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-9 + Stats-Aggr Operator + + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-12_718_4872046193986116133/-mr-10000 +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-12_718_4872046193986116133/-mr-10000 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +NULL 24 23 +27 1 1 +66 1 1 +86 1 1 +98 1 1 +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-12_910_4327266843692491081/-mr-10000 +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-12_910_4327266843692491081/-mr-10000 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +NULL 5539 5326 +27 27 27 +66 66 66 +86 86 86 +98 98 98 +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME INPUT))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL INPUT) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL INPUT) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL INPUT) key)))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + input + TableScan + alias: input + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: int + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: int + expr: substr(value, 5) + type: string + tag: -1 + Select Operator + expressions: + expr: key + type: int + expr: value + type: string + outputColumnNames: key, value + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(KEY._col1) + expr: count(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-13_062_6546847625513088922/-mr-10004 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-13_062_6546847625513088922/-mr-10005 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: bigint + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + expr: count(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: bigint + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-13_062_6546847625513088922/-mr-10006 + Reduce Output Operator + key expressions: + expr: key + type: int + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: int + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(KEY._col1) + expr: sum(DISTINCT KEY._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + expr: KEY._col1 + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-13_062_6546847625513088922/-mr-10007 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: double + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: sum(DISTINCT KEY._col1:0._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: partials + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-8 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-13_062_6546847625513088922/-mr-10008 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: final + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: double + expr: _col2 + type: double + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: int + expr: UDFToInteger(_col1) + type: int + expr: UDFToInteger(_col2) + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-9 + Stats-Aggr Operator + + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-44_473_6635442228635925393/-mr-10000 +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-44_473_6635442228635925393/-mr-10000 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +NULL 24 23 +27 1 1 +66 1 1 +86 1 1 +98 1 1 +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-44_647_6449358205847331193/-mr-10000 +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-25_11-10-44_647_6449358205847331193/-mr-10000 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +NULL 5539 5326 +27 27 27 +66 66 66 +86 86 86 +98 98 98 +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 Index: ql/src/test/results/clientpositive/groupby2_map_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_map_withrollup.q.out (revision 0) @@ -0,0 +1,150 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))))) (TOK_ROLLUP_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + expr: _col3 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY._col1:0._col0) + expr: sum(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + expr: concat(_col0, _col2) + type: string + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col0 + type: string + expr: UDFToInteger(_col1) + type: int + expr: _col2 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-33_268_5311916003721729158/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-22_17-51-33_268_5311916003721729158/-mr-10000 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +NULL 309 NULL +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 Index: ql/src/test/results/clientpositive/groupby1_noskew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_noskew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_noskew_withrollup.q.out (revision 0) @@ -0,0 +1,456 @@ +PREHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest_g1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-36-54_565_2640409376360077955/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g1 +POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g1.* FROM dest_g1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-37-05_989_1701193209711572351/-mr-10000 +POSTHOOK: query: SELECT dest_g1.* FROM dest_g1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-24_12-37-05_989_1701193209711572351/-mr-10000 +POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby1_map_skew_withrollup.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map_skew_withrollup.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_map_skew_withrollup.q.out (revision 0) @@ -0,0 +1,465 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_ROLLUP_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: sum(substr(value, 5)) + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-00-29_805_4450480774762751/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-00-44_099_5240777264595856315/-mr-10000 +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-08-23_19-00-44_099_5240777264595856315/-mr-10000 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +NULL 130091.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/queries/clientpositive/groupby1_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_withrollup.q (revision 0) @@ -0,0 +1,15 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE; + +set fs.default.name=invalidscheme:///; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +set fs.default.name=file:///; + +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +SELECT dest_g1.* FROM dest_g1; Index: ql/src/test/queries/clientpositive/groupby8_noskew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_noskew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_noskew_withrollup.q (revision 0) @@ -0,0 +1,19 @@ +set hive.map.aggr=false; + +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_noskew_multi_distinct_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest_g2.* FROM dest_g2; Index: ql/src/test/queries/clientpositive/groupby1_map_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_map_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_map_withrollup.q (revision 0) @@ -0,0 +1,12 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby_map_ppr_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_map_ppr_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_map_ppr_withrollup.q (revision 0) @@ -0,0 +1,20 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby1_map_nomap_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_map_nomap_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_map_nomap_withrollup.q (revision 0) @@ -0,0 +1,12 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set hive.groupby.mapaggr.checkinterval=20; + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby7_map_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_map_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_map_withrollup.q (revision 0) @@ -0,0 +1,21 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby9_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby9_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby9_withrollup.q (revision 0) @@ -0,0 +1,67 @@ + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +set hive.multigroupby.singlemr=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + + Index: ql/src/test/queries/clientpositive/groupby2_noskew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_noskew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_noskew_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest_g2.* FROM dest_g2; Index: ql/src/test/queries/clientpositive/groupby_neg_float_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_neg_float_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_neg_float_withrollup.q (revision 0) @@ -0,0 +1,12 @@ +FROM src +SELECT cast('-30.33' as DOUBLE) +GROUP BY cast('-30.33' as DOUBLE) +WITH ROLLUP +LIMIT 2; + + +FROM src +SELECT '-30.33' +GROUP BY '-30.33' +WITH ROLLUP +LIMIT 2; Index: ql/src/test/queries/clientpositive/groupby10_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby10_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby10_withrollup.q (revision 0) @@ -0,0 +1,34 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE dest1(key INT, val1 INT, val2 INT); +CREATE TABLE dest2(key INT, val1 INT, val2 INT); + +CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/kv5.txt' INTO TABLE INPUT; + +EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP; + +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP; + +SELECT * from dest1; +SELECT * from dest2; + +set hive.multigroupby.singlemr=true; + +EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP; + +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key WITH ROLLUP; + +SELECT * from dest1; +SELECT * from dest2; Index: ql/src/test/queries/clientpositive/groupby2_map_skew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_map_skew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_map_skew_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby2_limit_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_limit_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_limit_withrollup.q (revision 0) @@ -0,0 +1,7 @@ +set mapred.reduce.tasks=31; + +EXPLAIN +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5; + +SELECT src.key, sum(substr(src.value,5)) FROM src GROUP BY src.key WITH ROLLUP LIMIT 5; + Index: ql/src/test/queries/clientpositive/groupby7_noskew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_noskew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_noskew_withrollup.q (revision 0) @@ -0,0 +1,22 @@ +set hive.map.aggr=false; + +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby2_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest_g2.* FROM dest_g2; Index: ql/src/test/queries/clientpositive/groupby7_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_withrollup.q (revision 0) @@ -0,0 +1,15 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby8_map_skew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_map_skew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_map_skew_withrollup.q (revision 0) @@ -0,0 +1,19 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + Index: ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_ppr_multi_distinct_withrollup.q (revision 0) @@ -0,0 +1,19 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=false; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby_ppr_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_ppr_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_ppr_withrollup.q (revision 0) @@ -0,0 +1,19 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=false; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby2_map_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_map_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_map_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby2_map_multi_distinct_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_map_multi_distinct_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_map_multi_distinct_withrollup.q (revision 0) @@ -0,0 +1,14 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby1_noskew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_noskew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_noskew_withrollup.q (revision 0) @@ -0,0 +1,12 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +SELECT dest_g1.* FROM dest_g1; Index: ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_map_ppr_multi_distinct_withrollup.q (revision 0) @@ -0,0 +1,20 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE; + +EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby1_map_skew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_map_skew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_map_skew_withrollup.q (revision 0) @@ -0,0 +1,12 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby11_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby11_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby11_withrollup.q (revision 0) @@ -0,0 +1,27 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + + + + +CREATE TABLE dest1(key STRING, val1 INT, val2 INT) partitioned by (ds string); +CREATE TABLE dest2(key STRING, val1 INT, val2 INT) partitioned by (ds string); + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP; + +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value WITH ROLLUP +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) WITH ROLLUP; + +SELECT * from dest1; +SELECT * from dest2; + + + Index: ql/src/test/queries/clientpositive/groupby1_limit_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_limit_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_limit_withrollup.q (revision 0) @@ -0,0 +1,10 @@ +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5; + +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key WITH ROLLUP LIMIT 5; + +SELECT dest1.* FROM dest1; Index: ql/src/test/queries/clientpositive/groupby8_map_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_map_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_map_withrollup.q (revision 0) @@ -0,0 +1,19 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + Index: ql/src/test/queries/clientpositive/groupby_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby_withrollup.q (revision 0) @@ -0,0 +1,206 @@ +CREATE TABLE DEST1(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, key2 INT, value STRING, value2 STRING) STORED AS TEXTFILE; + +set hive.map.aggr=true; +set hive.groupby.skewindata=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +set hive.map.aggr=true; +set hive.groupby.skewindata=false; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +set hive.map.aggr=false; +set hive.groupby.skewindata=false; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 + SELECT + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1), + sum(SUBSTR(SRC.value,5)), + COUNT(DISTINCT SUBSTR(SRC.value, LENGTH(SRC.value), 1)) + GROUP BY + SUBSTR(CAST(SRC.key AS STRING), 1, 1), + SUBSTR(CAST(SRC.key AS STRING), LENGTH(CAST(SRC.key AS STRING)), 1) + WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; \ No newline at end of file Index: ql/src/test/queries/clientpositive/groupby7_map_skew_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_map_skew_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_map_skew_withrollup.q (revision 0) @@ -0,0 +1,21 @@ +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby8_withrollup.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_withrollup.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_withrollup.q (revision 0) @@ -0,0 +1,31 @@ +set hive.map.aggr=false; +set hive.groupby.skewindata=true; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + +set hive.multigroupby.singlemr=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key WITH ROLLUP; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapper.java (working copy) @@ -27,4 +27,7 @@ abstract KeyWrapper copyKey(); abstract void copyKey(KeyWrapper oldWrapper); abstract Object[] getKeyArray(); + abstract int firstDifference(Object obj); + abstract void setNull(int index); + abstract KeyWrapper copySameKeys(); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (working copy) @@ -49,12 +49,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObject; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -106,6 +106,7 @@ protected transient KeyWrapper newKeys; protected transient AggregationBuffer[] aggregations; protected transient Object[][] aggregationsParametersLastInvoke; + protected transient AggregationBuffer[][] rollupAggregations; // Used by hash-based GroupBy: Mode = HASH, PARTIALS protected transient HashMap hashAggregations; @@ -326,6 +327,7 @@ aggregationsParametersLastInvoke = new Object[conf.getAggregators().size()][]; if (conf.getMode() != GroupByDesc.Mode.HASH || bucketGroup) { aggregations = newAggregations(); + initializeRollupAggregations(); hashAggr = false; } else { hashAggregations = new HashMap(256); @@ -399,6 +401,15 @@ estimateRowSize(); } + private void initializeRollupAggregations() throws HiveException { + if (conf.getWithRollup()) { + rollupAggregations = new AggregationBuffer[keyFields.length][]; + for (int i = 0; i < keyFields.length; i++) { + rollupAggregations[i] = newAggregations(); + } + } + } + private static final int javaObjectOverHead = 64; private static final int javaHashEntryOverHead = 64; private static final int javaSizePrimitiveType = 16; @@ -564,11 +575,11 @@ * @param newEntryForHashAggr only valid if it is a hash aggregation, whether * it is a new entry or not */ - protected void updateAggregations(AggregationBuffer[] aggs, Object row, + protected void updateAggregations(List aggsList, Object row, ObjectInspector rowInspector, boolean hashAggr, - boolean newEntryForHashAggr, Object[][] lastInvoke) throws HiveException { + List newEntryForHashAggr, Object[][] lastInvoke) throws HiveException { if (unionExprEval == null) { - for (int ai = 0; ai < aggs.length; ai++) { + for (int ai = 0; ai < aggsList.get(0).length; ai++) { // Calculate the parameters Object[] o = new Object[aggregationParameterFields[ai].length]; for (int pi = 0; pi < aggregationParameterFields[ai].length; pi++) { @@ -578,8 +589,10 @@ // Update the aggregations. if (aggregationIsDistinct[ai]) { if (hashAggr) { - if (newEntryForHashAggr) { - aggregationEvaluators[ai].aggregate(aggs[ai], o); + for (int i = 0; i < aggsList.size(); i++) { + if (newEntryForHashAggr.get(i)) { + aggregationEvaluators[ai].aggregate(aggsList.get(i)[ai], o); + } } } else { if (lastInvoke[ai] == null) { @@ -588,7 +601,9 @@ if (ObjectInspectorUtils.compare(o, aggregationParameterObjectInspectors[ai], lastInvoke[ai], aggregationParameterStandardObjectInspectors[ai]) != 0) { - aggregationEvaluators[ai].aggregate(aggs[ai], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[ai].aggregate(aggs[ai], o); + } for (int pi = 0; pi < o.length; pi++) { lastInvoke[ai][pi] = ObjectInspectorUtils.copyToStandardObject( o[pi], aggregationParameterObjectInspectors[ai][pi], @@ -597,7 +612,9 @@ } } } else { - aggregationEvaluators[ai].aggregate(aggs[ai], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[ai].aggregate(aggs[ai], o); + } } } return; @@ -615,7 +632,9 @@ for (int pi = 0; pi < aggregationParameterFields[pos].length; pi++) { o[pi] = aggregationParameterFields[pos][pi].evaluate(row); } - aggregationEvaluators[pos].aggregate(aggs[pos], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[pos].aggregate(aggs[pos], o); + } } } // there may be multi distinct clauses for one column @@ -628,8 +647,10 @@ } if (hashAggr) { - if (newEntryForHashAggr) { - aggregationEvaluators[i].aggregate(aggs[i], o); + for (int j = 0; j < aggsList.size(); j++) { + if (newEntryForHashAggr.get(j)) { + aggregationEvaluators[i].aggregate(aggsList.get(j)[i], o); + } } } else { if (lastInvoke[i] == null) { @@ -639,7 +660,9 @@ aggregationParameterObjectInspectors[i], lastInvoke[i], aggregationParameterStandardObjectInspectors[i]) != 0) { - aggregationEvaluators[i].aggregate(aggs[i], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[i].aggregate(aggs[i], o); + } for (int pi = 0; pi < o.length; pi++) { lastInvoke[i][pi] = ObjectInspectorUtils.copyToStandardObject( o[pi], aggregationParameterObjectInspectors[i][pi], @@ -658,18 +681,22 @@ for (int pi = 0; pi < aggregationParameterFields[pos].length; pi++) { o[pi] = aggregationParameterFields[pos][pi].evaluate(row); } - aggregationEvaluators[pos].aggregate(aggs[pos], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[pos].aggregate(aggs[pos], o); + } } } } else { - for (int ai = 0; ai < aggs.length; ai++) { + for (int ai = 0; ai < aggsList.get(0).length; ai++) { // there is no distinct aggregation, // update all aggregations Object[] o = new Object[aggregationParameterFields[ai].length]; for (int pi = 0; pi < aggregationParameterFields[ai].length; pi++) { o[pi] = aggregationParameterFields[ai][pi].evaluate(row); } - aggregationEvaluators[ai].aggregate(aggs[ai], o); + for (AggregationBuffer[] aggs : aggsList) { + aggregationEvaluators[ai].aggregate(aggs[ai], o); + } } } } @@ -697,12 +724,13 @@ if (numRowsInput == numRowsCompareHashAggr) { numRowsCompareHashAggr += groupbyMapAggrInterval; // map-side aggregation should reduce the entries by at-least half - if (numRowsHashTbl > numRowsInput * minReductionHashAggr) { + if (numRowsHashTbl > numRowsInput * minReductionHashAggr && !conf.getWithRollup()) { LOG.warn("Disable Hash Aggr: #hash table = " + numRowsHashTbl + " #total = " + numRowsInput + " reduction = " + 1.0 * (numRowsHashTbl / numRowsInput) + " minReduction = " + minReductionHashAggr); flush(true); + initializeRollupAggregations(); hashAggr = false; } else { LOG.trace("Hash Aggr Enabled: #hash table = " + numRowsHashTbl @@ -740,31 +768,51 @@ private void processHashAggr(Object row, ObjectInspector rowInspector, KeyWrapper newKeys) throws HiveException { - // Prepare aggs for updating - AggregationBuffer[] aggs = null; - boolean newEntryForHashAggr = false; + int i = conf.getNonDistinctKeys() - 1; + List aggsList = new ArrayList(); + List newEntryForHashAggrList = new ArrayList(); + List keys = new ArrayList(); - // hash-based aggregations - aggs = hashAggregations.get(newKeys); - if (aggs == null) { - KeyWrapper newKeyProber = newKeys.copyKey(); - aggs = newAggregations(); - hashAggregations.put(newKeyProber, aggs); - newEntryForHashAggr = true; - numRowsHashTbl++; // new entry in the hash table - } + do { + // Prepare aggs for updating + AggregationBuffer[] aggs = null; + boolean newEntryForHashAggr = false; - // If the grouping key and the reduction key are different, a set of - // grouping keys for the current reduction key are maintained in - // keysCurrentGroup - // Peek into the set to find out if a new grouping key is seen for the given - // reduction key - if (groupKeyIsNotReduceKey) { - newEntryForHashAggr = keysCurrentGroup.add(newKeys.copyKey()); - } + // hash-based aggregations + aggs = hashAggregations.get(newKeys); + if (aggs == null) { + KeyWrapper newKeyProber = newKeys.copyKey(); + aggs = newAggregations(); + hashAggregations.put(newKeyProber, aggs); + newEntryForHashAggr = true; + numRowsHashTbl++; // new entry in the hash table + } + // If the grouping key and the reduction key are different, a set of + // grouping keys for the current reduction key are maintained in + // keysCurrentGroup + // Peek into the set to find out if a new grouping key is seen for the given + // reduction key + if (groupKeyIsNotReduceKey) { + newEntryForHashAggr = keysCurrentGroup.add(newKeys.copyKey()); + } + + aggsList.add(aggs); + newEntryForHashAggrList.add(newEntryForHashAggr); + keys.add(newKeys); + + if (conf.getWithRollup() && i >= 0) { + newKeys = newKeys.copySameKeys(); + newKeys.setNull(i); + newKeys.setHashKey(); + } + + i--; + } while (i >= -1 && conf.getWithRollup()); + + // Update the aggs - updateAggregations(aggs, row, rowInspector, true, newEntryForHashAggr, null); + updateAggregations(aggsList, row, rowInspector, true, newEntryForHashAggrList, null); // We can only flush after the updateAggregations is done, or the // potentially new entry "aggs" @@ -774,28 +822,39 @@ // flushed. // If the grouping key is not the same as reduction key, flushing can only // happen at boundaries - if ((!groupKeyIsNotReduceKey || firstRowInGroup) - && shouldBeFlushed(newKeys)) { - flush(false); + for (KeyWrapper key : keys) { + if ((!groupKeyIsNotReduceKey || firstRowInGroup) + && shouldBeFlushed(key)) { + flush(false); + } } } // Non-hash aggregation + // NOTE: DO NOT use non-hash aggregation WITH ROLLUP if any of the aggregations are DISTINCT private void processAggr(Object row, ObjectInspector rowInspector, KeyWrapper newKeys) throws HiveException { // Prepare aggs for updating - AggregationBuffer[] aggs = null; Object[][] lastInvoke = null; + int firstDifferentKey = -1; //boolean keysAreEqual = (currentKeys != null && newKeys != null)? // newKeyStructEqualComparer.areEqual(currentKeys, newKeys) : false; boolean keysAreEqual = (currentKeys != null && newKeys != null)? newKeys.equals(currentKeys) : false; - // Forward the current keys if needed for sort-based aggregation if (currentKeys != null && !keysAreEqual) { forward(currentKeys.getKeyArray(), aggregations); + + if (conf.getWithRollup()) { + firstDifferentKey = newKeys.firstDifference(currentKeys); + for (int i = keyFields.length - 1; i >= firstDifferentKey; i--) { + currentKeys.setNull(i); + forward(currentKeys.getKeyArray(), rollupAggregations[i]); + } + } + countAfterReport = 0; } @@ -810,18 +869,31 @@ // Reset the aggregations resetAggregations(aggregations); + if (firstDifferentKey >= 0) { + for (int i = keyFields.length - 1; i >= firstDifferentKey; i--) { + resetAggregations(rollupAggregations[i]); + } + } + // clear parameters in last-invoke for (int i = 0; i < aggregationsParametersLastInvoke.length; i++) { aggregationsParametersLastInvoke[i] = null; } } - aggs = aggregations; + List aggsList = new ArrayList(); + aggsList.add(aggregations); + if (conf.getWithRollup()) { + for (int i = keyFields.length - 1; i >= 0; i--) { + aggsList.add(rollupAggregations[i]); + } + } + lastInvoke = aggregationsParametersLastInvoke; // Update the aggs - updateAggregations(aggs, row, rowInspector, false, false, lastInvoke); + updateAggregations(aggsList, row, rowInspector, false, new ArrayList(), lastInvoke); } /** @@ -1011,6 +1083,13 @@ if (currentKeys != null) { forward(currentKeys.getKeyArray(), aggregations); } + + if (conf.getWithRollup()) { + for (int i = keyFields.length - 1; i >= 0; i--) { + currentKeys.setNull(i); + forward(currentKeys.getKeyArray(), rollupAggregations[i]); + } + } currentKeys = null; } else { // The GroupByOperator is not initialized, which means there is no @@ -1058,4 +1137,4 @@ public OperatorType getType() { return OperatorType.GROUPBY; } -} +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/KeyWrapperFactory.java (working copy) @@ -102,6 +102,12 @@ } @Override + public int firstDifference(Object obj) { + Object[] copied_in_hashmap = ((ListKeyWrapper) obj).keys; + return equalComparer.firstDifference(copied_in_hashmap, keys); + } + + @Override public void setHashKey() { hashcode = Arrays.hashCode(keys); } @@ -135,6 +141,21 @@ return keys; } + @Override + public void setNull(int index) { + assert(index < keys.length); + keys[index] = null; + } + + @Override + public KeyWrapper copySameKeys() { + Object[] newKeyArray = new Object[keys.length]; + for (int i = 0; i < keys.length; i++) { + newKeyArray[i] = keys[i]; + } + return new ListKeyWrapper(hashcode, newKeyArray, false); + } + private Object[] deepCopyElements(Object[] keys, ObjectInspector[] keyObjectInspectors, ObjectInspectorCopyOption copyOption) { @@ -200,6 +221,15 @@ } @Override + public int firstDifference(Object other) { + if (equals(other)) { + return 1; + } else { + return 0; + } + } + + @Override public void setHashKey() { if (key == null) { hashcode = 0; @@ -234,5 +264,16 @@ singleEleArray[0] = key; return singleEleArray; } + + @Override + public void setNull(int index) { + assert(index == 0); + key = null; + } + + @Override + public KeyWrapper copySameKeys() { + return new TextKeyWrapper(hashcode, key, false); + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java (working copy) @@ -55,6 +55,9 @@ private float groupByMemoryUsage; private float memoryThreshold; + private boolean withRollup; + private int nonDistinctKeys; + public GroupByDesc() { } @@ -63,9 +66,20 @@ final java.util.ArrayList outputColumnNames, final java.util.ArrayList keys, final java.util.ArrayList aggregators, + final boolean groupKeyNotReductionKey,float groupByMemoryUsage, float memoryThreshold, + final boolean withRollup, final int nonDistinctKeys) { + this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey, + false, groupByMemoryUsage, memoryThreshold, withRollup, nonDistinctKeys); + } + + public GroupByDesc( + final Mode mode, + final java.util.ArrayList outputColumnNames, + final java.util.ArrayList keys, + final java.util.ArrayList aggregators, final boolean groupKeyNotReductionKey,float groupByMemoryUsage, float memoryThreshold) { this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey, - false, groupByMemoryUsage, memoryThreshold); + false, groupByMemoryUsage, memoryThreshold, false, keys.size()); } public GroupByDesc( @@ -73,7 +87,8 @@ final java.util.ArrayList outputColumnNames, final java.util.ArrayList keys, final java.util.ArrayList aggregators, - final boolean groupKeyNotReductionKey, final boolean bucketGroup,float groupByMemoryUsage, float memoryThreshold) { + final boolean groupKeyNotReductionKey, final boolean bucketGroup,float groupByMemoryUsage, float memoryThreshold, + final boolean withRollup, final int nonDistinctKeys) { this.mode = mode; this.outputColumnNames = outputColumnNames; this.keys = keys; @@ -82,6 +97,8 @@ this.bucketGroup = bucketGroup; this.groupByMemoryUsage = groupByMemoryUsage; this.memoryThreshold = memoryThreshold; + this.withRollup = withRollup; + this.nonDistinctKeys = nonDistinctKeys; } public Mode getMode() { @@ -175,4 +192,20 @@ public void setBucketGroup(boolean dataSorted) { bucketGroup = dataSorted; } + + public boolean getWithRollup() { + return withRollup; + } + + public void setWithRollup(boolean withRollup) { + this.withRollup = withRollup; + } + + public int getNonDistinctKeys() { + return nonDistinctKeys; + } + + public void setNonDistinctKeys(int nonDistinctKeys) { + this.nonDistinctKeys = nonDistinctKeys; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -254,6 +254,7 @@ TOK_TABSRC; TOK_RESTRICT; TOK_CASCADE; +TOK_ROLLUP_GROUPBY; } @@ -1725,6 +1726,8 @@ KW_GROUP KW_BY groupByExpression ( COMMA groupByExpression )* + (with=KW_WITH KW_ROLLUP)? + -> {with != null}? ^(TOK_ROLLUP_GROUPBY groupByExpression+) -> ^(TOK_GROUPBY groupByExpression+) ; @@ -2318,6 +2321,7 @@ KW_UPDATE: 'UPDATE'; KW_RESTRICT: 'RESTRICT'; KW_CASCADE: 'CASCADE'; +KW_ROLLUP: 'ROLLUP'; // Operators Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1183502) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -816,6 +816,7 @@ break; case HiveParser.TOK_GROUPBY: + case HiveParser.TOK_ROLLUP_GROUPBY: // Get the groupby aliases - these are aliased to the entries in the // select list queryProperties.setHasGroupBy(true); @@ -1957,6 +1958,14 @@ } } + static boolean isGroupByWithRollup(QBParseInfo parseInfo, String dest) { + ASTNode groupBy = parseInfo.getGroupByForClause(dest); + if (groupBy != null && groupBy.getToken().getType() == HiveParser.TOK_ROLLUP_GROUPBY) { + return true; + } + return false; + } + private static String[] getColAlias(ASTNode selExpr, String defaultName, RowResolver inputRR, boolean includeFuncName, int colNum) { String colAlias = null; @@ -2418,7 +2427,7 @@ @SuppressWarnings("nls") private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, - Map genericUDAFEvaluators) + Map genericUDAFEvaluators, boolean withRollup) throws SemanticException { RowResolver groupByInputRowResolver = opParseCtx .get(reduceSinkOperatorInfo).getRowResolver(); @@ -2521,7 +2530,7 @@ float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()), + false,groupByMemoryUsage,memoryThreshold, withRollup, groupByKeys.size()), new RowSchema(groupByOutputRowResolver.getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; @@ -2544,7 +2553,7 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, String dest, Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, Map genericUDAFEvaluators, - boolean distPartAgg) throws SemanticException { + boolean distPartAgg, boolean withRollup) throws SemanticException { ArrayList outputColumnNames = new ArrayList(); RowResolver groupByInputRowResolver = opParseCtx .get(reduceSinkOperatorInfo).getRowResolver(); @@ -2670,7 +2679,7 @@ float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - distPartAgg,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver + distPartAgg,groupByMemoryUsage,memoryThreshold, withRollup, groupByKeys.size()), new RowSchema(groupByOutputRowResolver .getColumnInfos()), reduceSinkOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); @@ -2678,6 +2687,126 @@ } /** + * Generate the GroupByOperator for the Query Block + * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child + * of the inputOperatorInfo. + * + * @param mode + * The mode of the aggregation (HASH) + * @param genericUDAFEvaluators + * If not null, this function will store the mapping from Aggregation + * StringTree to the genericUDAFEvaluator in this parameter, so it + * can be used in the next-stage GroupBy aggregations. + * @return the new GroupByOperator + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanGroupByOperator2(QBParseInfo parseInfo, String dest, + Operator reduceSinkOperatorInfo, GroupByDesc.Mode mode, + Map genericUDAFEvaluators, boolean withRollup) + throws SemanticException { + + ArrayList outputColumnNames = new ArrayList(); + RowResolver groupByInputRowResolver = opParseCtx + .get(reduceSinkOperatorInfo).getRowResolver(); + RowResolver groupByOutputRowResolver = new RowResolver(); + groupByOutputRowResolver.setIsExprResolver(true); + ArrayList groupByKeys = new ArrayList(); + ArrayList aggregations = new ArrayList(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + Map colExprMap = new HashMap(); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + ExprNodeDesc grpByExprNode = genExprNodeDesc(grpbyExpr, + groupByInputRowResolver); + + groupByKeys.add(grpByExprNode); + String field = getColumnInternalName(i); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(grpbyExpr, + new ColumnInfo(field, grpByExprNode.getTypeInfo(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + + int nonDistinctKeys = groupByKeys.size(); + + // If there is a distinctFuncExp, add all parameters to the reduceKeys. + if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { + List list = parseInfo.getDistinctFuncExprsForClause(dest); + int numDistn = 0; + for(ASTNode value: list) { + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + if (groupByOutputRowResolver.getExpression(parameter) == null) { + ExprNodeDesc distExprNode = genExprNodeDesc(parameter, + groupByInputRowResolver); + groupByKeys.add(distExprNode); + numDistn++; + String field = getColumnInternalName(grpByExprs.size() + numDistn - + 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(parameter, new ColumnInfo( + field, distExprNode.getTypeInfo(), "", false)); + colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); + } + } + } + } + + // For each aggregation + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + assert (aggregationTrees != null); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + String aggName = unescapeIdentifier(value.getChild(0).getText()); + ArrayList aggParameters = new ArrayList(); + new ArrayList>(); + // 0 is the function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode paraExpr = (ASTNode) value.getChild(i); + ExprNodeDesc paraExprNode = genExprNodeDesc(paraExpr, + groupByInputRowResolver); + + aggParameters.add(paraExprNode); + } + + boolean isDistinct = value.getType() == HiveParser.TOK_FUNCTIONDI; + boolean isAllColumns = value.getType() == HiveParser.TOK_FUNCTIONSTAR; + Mode amode = groupByDescModeToUDAFMode(mode, isDistinct); + + GenericUDAFEvaluator genericUDAFEvaluator = getGenericUDAFEvaluator( + aggName, aggParameters, value, isDistinct, isAllColumns); + assert (genericUDAFEvaluator != null); + GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, + aggParameters); + aggregations.add(new AggregationDesc(aggName.toLowerCase(), + udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + amode)); + String field = getColumnInternalName(groupByKeys.size() + + aggregations.size() - 1); + outputColumnNames.add(field); + groupByOutputRowResolver.putExpression(value, new ColumnInfo( + field, udaf.returnType, "", false)); + // Save the evaluator so that it can be used by the next-stage + // GroupByOperators + if (genericUDAFEvaluators != null) { + genericUDAFEvaluators.put(entry.getKey(), genericUDAFEvaluator); + } + } + float groupByMemoryUsage = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY); + float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); + Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( + new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, + false,groupByMemoryUsage,memoryThreshold, withRollup, nonDistinctKeys), new RowSchema(groupByOutputRowResolver + .getColumnInfos()), reduceSinkOperatorInfo), + groupByOutputRowResolver); + op.setColumnExprMap(colExprMap); + return op; + } + + /** * Generate the map-side GroupByOperator for the Query Block * (qb.getParseInfo().getXXX(dest)). The new GroupByOperator will be a child * of the inputOperatorInfo. @@ -2693,7 +2822,7 @@ @SuppressWarnings("nls") private Operator genGroupByPlanMapGroupByOperator(QB qb, String dest, Operator inputOperatorInfo, GroupByDesc.Mode mode, - Map genericUDAFEvaluators) + Map genericUDAFEvaluators, boolean withRollup) throws SemanticException { RowResolver groupByInputRowResolver = opParseCtx.get(inputOperatorInfo) @@ -2719,6 +2848,8 @@ colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } + int nonDistinctKeys = groupByKeys.size(); + // If there is a distinctFuncExp, add all parameters to the reduceKeys. if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { List list = parseInfo.getDistinctFuncExprsForClause(dest); @@ -2789,7 +2920,7 @@ float memoryThreshold = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVEMAPAGGRMEMORYTHRESHOLD); Operator op = putOpInsertMap(OperatorFactory.getAndMakeChild( new GroupByDesc(mode, outputColumnNames, groupByKeys, aggregations, - false,groupByMemoryUsage,memoryThreshold), new RowSchema(groupByOutputRowResolver.getColumnInfos()), + false,groupByMemoryUsage,memoryThreshold, withRollup, nonDistinctKeys), new RowSchema(groupByOutputRowResolver.getColumnInfos()), inputOperatorInfo), groupByOutputRowResolver); op.setColumnExprMap(colExprMap); return op; @@ -2800,6 +2931,133 @@ * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child * of inputOperatorInfo. * + * It will put all Group By keys and the distinct fields (if any) in the + * map-reduce sort key, and all other fields in the map-reduce value. + * + * @param numPartitionFields + * the number of fields for map-reduce partitioning. This is usually + * the number of fields in the Group By keys. + * @return the new ReduceSinkOperator. + * @throws SemanticException + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanReduceSinkOperatorWithRollup(QB qb, String dest, + Operator inputOperatorInfo, int numPartitionFields, int numReducers, + boolean mapAggrDone) throws SemanticException { + + RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo) + .getRowResolver(); + QBParseInfo parseInfo = qb.getParseInfo(); + RowResolver reduceSinkOutputRowResolver = new RowResolver(); + reduceSinkOutputRowResolver.setIsExprResolver(true); + Map colExprMap = new HashMap(); + ArrayList reduceKeys = new ArrayList(); + // Pre-compute group-by keys and store in reduceKeys + + List outputKeyColumnNames = new ArrayList(); + List outputValueColumnNames = new ArrayList(); + List grpByExprs = getGroupByForClause(parseInfo, dest); + for (int i = 0; i < grpByExprs.size(); ++i) { + ASTNode grpbyExpr = grpByExprs.get(i); + ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr, + reduceSinkInputRowResolver); + reduceKeys.add(inputExpr); + if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) { + outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); + String field = Utilities.ReduceField.KEY.toString() + "." + + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( + reduceKeys.size() - 1).getTypeInfo(), null, false); + reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo); + colExprMap.put(colInfo.getInternalName(), inputExpr); + } else { + throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY + .getMsg(grpbyExpr)); + } + } + + // If there is a distinctFuncExp, add all parameters to the reduceKeys. + if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) { + List distFuncs = parseInfo.getDistinctFuncExprsForClause(dest); + for (int i = 0; i < distFuncs.size(); i++) { + ASTNode value = distFuncs.get(i); + // 0 is function name + for (int j = 1; j < value.getChildCount(); j++) { + ASTNode parameter = (ASTNode) value.getChild(j); + if (reduceSinkOutputRowResolver.getExpression(parameter) == null) { + ExprNodeDesc inputExpr = genExprNodeDesc(parameter, + reduceSinkInputRowResolver); + reduceKeys.add(inputExpr); + outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1)); + String field = Utilities.ReduceField.KEY.toString() + "." + + getColumnInternalName(reduceKeys.size() - 1); + ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get( + reduceKeys.size() - 1).getTypeInfo(), null, false); + reduceSinkOutputRowResolver.putExpression(parameter, colInfo); + colExprMap.put(colInfo.getInternalName(), inputExpr); + } + } + } + } + + ArrayList reduceValues = new ArrayList(); + HashMap aggregationTrees = parseInfo + .getAggregationExprsForClause(dest); + + if (!mapAggrDone) { + // Put parameters to aggregations in reduceValues + for (Map.Entry entry : aggregationTrees.entrySet()) { + ASTNode value = entry.getValue(); + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + if (reduceSinkOutputRowResolver.getExpression(parameter) == null) { + reduceValues.add(genExprNodeDesc(parameter, + reduceSinkInputRowResolver)); + outputValueColumnNames + .add(getColumnInternalName(reduceValues.size() - 1)); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field, + reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null, + false)); + } + } + } + } else { + // Put partial aggregation results in reduceValues + int inputField = reduceKeys.size(); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + + TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get( + inputField).getType(); + reduceValues.add(new ExprNodeColumnDesc(type, + getColumnInternalName(inputField), "", false)); + inputField++; + outputValueColumnNames.add(getColumnInternalName(reduceValues.size() - 1)); + String field = Utilities.ReduceField.VALUE.toString() + "." + + getColumnInternalName(reduceValues.size() - 1); + reduceSinkOutputRowResolver.putExpression(entry.getValue(), + new ColumnInfo(field, type, null, false)); + } + } + + ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, + reduceKeys.size(), reduceValues, new ArrayList>(), + outputKeyColumnNames, outputValueColumnNames, true, -1, numPartitionFields, + numReducers), new RowSchema(reduceSinkOutputRowResolver + .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver); + rsOp.setColumnExprMap(colExprMap); + return rsOp; + } + + /** + * Generate the ReduceSinkOperator for the Group By Query Block + * (qb.getPartInfo().getXXX(dest)). The new ReduceSinkOperator will be a child + * of inputOperatorInfo. + * * It will put all Group By keys and the distinct field (if any) in the * map-reduce sort key, and all other fields in the map-reduce value. * @@ -3146,7 +3404,7 @@ // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, - dest, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null); + dest, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null, false); return groupByOperatorInfo; } @@ -3200,7 +3458,7 @@ // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator1(parseInfo, - dest, input, GroupByDesc.Mode.HASH, genericUDAFEvaluators, true); + dest, input, GroupByDesc.Mode.HASH, genericUDAFEvaluators, true, false); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); @@ -3246,7 +3504,7 @@ // ////// Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, - dest, input, GroupByDesc.Mode.COMPLETE, null); + dest, input, GroupByDesc.Mode.COMPLETE, null, false); return groupByOperatorInfo; } @@ -3311,10 +3569,10 @@ // ////// 2. Generate GroupbyOperator Map genericUDAFEvaluators = - new LinkedHashMap(); + new LinkedHashMap(); GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator( parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIAL1, - genericUDAFEvaluators); + genericUDAFEvaluators, isGroupByWithRollup(parseInfo, dest)); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); @@ -3334,6 +3592,159 @@ return groupByOperatorInfo2; } + /** + * Generate a Group-By plan using 3 map-reduce jobs. However, only 2 + * group-by plans are generated if the query involves no grouping key and no + * distincts. In that case, the plan is same as generated by + * genGroupByPlanMapAggr2MRWithRollup. Otherwise, the following plan is generated: First + * perform a reduce side partial aggregation (to reduce the amount of data). Then + * spray by the grouping key and distinct key (or a random number, if no + * distinct is present) in hope of getting a uniform distribution, and compute + * partial aggregates grouped by the reduction key (grouping key + distinct + * key). Evaluate partial aggregates first, and spray by the grouping key to + * compute actual aggregates in the second phase. The agggregation evaluation + * functions are as follows: Reducer: iterate/terminatePartial (mode = HASH) + * + * Partitioning Key: random() if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer: iterate/terminatePartial if DISTINCT merge/terminatePartial if NO + * DISTINCT (mode = MERGEPARTIAL) + * + * STAGE 2 + * + * Partitioining Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer: merge/terminate (mode = FINAL) + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanMapAggr3MRWithRollup(String dest, QB qb, + Operator inputOperatorInfo) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperatorWithRollup(qb, + dest, inputOperatorInfo, (parseInfo + .getDistinctFuncExprsForClause(dest).isEmpty() ? -1 + : Integer.MAX_VALUE), -1, false); + + // ////// Generate GroupbyOperator for a reduce-side partial aggregation + Map genericUDAFEvaluators = + new LinkedHashMap(); + GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator2( + parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.HASH, + genericUDAFEvaluators, true); + + groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( + inputOperatorInfo).getRowResolver().getTableNames()); + // Optimize the scenario when there are no grouping keys and no distinct - 2 + // map-reduce jobs are not needed + // For eg: select count(1) from T where t.ds = .... + if (!optimizeMapAggrGroupBy(dest, qb)) { + + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, (parseInfo + .getDistinctFuncExprsForClause(dest).isEmpty() ? -1 + : Integer.MAX_VALUE), -1, true); + + // ////// Generate GroupbyOperator for a partial aggregation + Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, + dest, reduceSinkOperatorInfo2, GroupByDesc.Mode.PARTIALS, + genericUDAFEvaluators, false, false); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) { + numReducers = 1; + } + + // ////// Generate ReduceSinkOperator2 + Operator reduceSinkOperatorInfo3 = genGroupByPlanReduceSinkOperator2MR( + parseInfo, dest, groupByOperatorInfo2, grpByExprs.size(), numReducers); + + // ////// Generate GroupbyOperator3 + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, + reduceSinkOperatorInfo3, GroupByDesc.Mode.FINAL, + genericUDAFEvaluators); + } else { + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest) + .size(), 1, true); + + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, + reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, genericUDAFEvaluators); + } + } + + /** + * Generate a Group-By plan using a 2 map-reduce jobs (3 operators will be + * inserted): + * + * + * @param dest + * @param qb + * @param input + * @return + * @throws SemanticException + * + * Generate a Group-By plan using 2 map-reduce jobs. First perform a reduce-side + * partial aggregation (to reduce the amount of data). Then + * spray by the group by key, and sort by the distinct key (if any), and + * compute aggregates based on actual aggregates + * + * The aggregation evaluation functions are as follows: Reducer 1: + * iterate/terminatePartial (mode = HASH) + * + * Partitioning Key: grouping key + * + * Sorting Key: grouping key if no DISTINCT grouping + distinct key if + * DISTINCT + * + * Reducer2: iterate/terminate if DISTINCT merge/terminate if NO DISTINCT (mode + * = MERGEPARTIAL) + */ + @SuppressWarnings("nls") + private Operator genGroupByPlan2MRWithRollup(String dest, QB qb, Operator input) + throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + int numReducers = -1; + List grpByExprs = getGroupByForClause(parseInfo, dest); + if (grpByExprs.isEmpty()) { + numReducers = 1; + } + + // ////// 1. Generate ReduceSinkOperator + Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperatorWithRollup(qb, + dest, input, grpByExprs.size(), numReducers, false); + + // ////// 2. Generate GroupbyOperator + Map genericUDAFEvaluators = + new LinkedHashMap(); + + GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanGroupByOperator2( + parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.HASH, + genericUDAFEvaluators, true); + + groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( + reduceSinkOperatorInfo).getRowResolver().getTableNames()); + + Operator reduceSinkOperatorInfo2 = genGroupByPlanReduceSinkOperator(qb, + dest, groupByOperatorInfo, grpByExprs.size(), numReducers, true); + return genGroupByPlanGroupByOperator1(parseInfo, dest, + reduceSinkOperatorInfo2, GroupByDesc.Mode.MERGEPARTIAL, genericUDAFEvaluators, false, false); + } + private boolean optimizeMapAggrGroupBy(String dest, QB qb) { List grpByExprs = getGroupByForClause(qb.getParseInfo(), dest); if ((grpByExprs != null) && !grpByExprs.isEmpty()) { @@ -3376,7 +3787,7 @@ new LinkedHashMap(); GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator( qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH, - genericUDAFEvaluators); + genericUDAFEvaluators, isGroupByWithRollup(parseInfo, dest)); groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( inputOperatorInfo).getRowResolver().getTableNames()); @@ -3402,7 +3813,7 @@ // on the reducer. return genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.MERGEPARTIAL, - genericUDAFEvaluators, false); + genericUDAFEvaluators, false, false); } /** @@ -3447,7 +3858,7 @@ new LinkedHashMap(); GroupByOperator groupByOperatorInfo = (GroupByOperator) genGroupByPlanMapGroupByOperator( qb, dest, inputOperatorInfo, GroupByDesc.Mode.HASH, - genericUDAFEvaluators); + genericUDAFEvaluators, isGroupByWithRollup(parseInfo, dest)); groupOpToInputTables.put(groupByOperatorInfo, opParseCtx.get( inputOperatorInfo).getRowResolver().getTableNames()); @@ -3465,7 +3876,7 @@ // ////// Generate GroupbyOperator for a partial aggregation Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, GroupByDesc.Mode.PARTIALS, - genericUDAFEvaluators, false); + genericUDAFEvaluators, false, false); int numReducers = -1; List grpByExprs = getGroupByForClause(parseInfo, dest); @@ -3479,8 +3890,8 @@ // ////// Generate GroupbyOperator3 return genGroupByPlanGroupByOperator2MR(parseInfo, dest, - reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, - genericUDAFEvaluators); + reduceSinkOperatorInfo2, GroupByDesc.Mode.FINAL, + genericUDAFEvaluators); } else { // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator(qb, @@ -5619,6 +6030,21 @@ return false; } + // see if there are any group bys with rollup + private boolean withRollupExists(QB qb) { + QBParseInfo qbp = qb.getParseInfo(); + + TreeSet ks = new TreeSet(); + ks.addAll(qbp.getClauseNames()); + + for (String dest : ks) { + if (isGroupByWithRollup(qb.getParseInfo(), dest)) { + return true; + } + } + return false; + } + // return the common group by key set. // Null if there are no common group by keys. private List getCommonGroupbyKeys(QB qb, Operator input) { @@ -5795,13 +6221,14 @@ List commonDistinctExprs = getCommonDistinctExprs(qb, input); List commonGbyKeys = getCommonGroupbyKeys(qb, input); LOG.warn("Common Gby keys:" + commonGbyKeys); - boolean optimizeMultiGroupBy = commonDistinctExprs != null; + boolean optimizeMultiGroupBy = commonDistinctExprs != null && !withRollupExists(qb); // Generate single MR job for multigroupby query if query has non-null common // groupby key set and there are zero or one common distinct expression. boolean singlemrMultiGroupBy = conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEMR) && commonGbyKeys != null && !commonGbyKeys.isEmpty() && - (!distinctExprsExists(qb) || commonDistinctExprs != null); + (!distinctExprsExists(qb) || commonDistinctExprs != null) && + !withRollupExists(qb); Operator curr = input; @@ -5880,9 +6307,17 @@ curr = genGroupByPlanMapAggr2MR(dest, qb, curr); } } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) { - curr = genGroupByPlan2MR(dest, qb, curr); - } else { - curr = genGroupByPlan1MR(dest, qb, curr); + if (isGroupByWithRollup(qb.getParseInfo(), dest) && !qb.getParseInfo().getDistinctFuncExprsForClause(dest).isEmpty()) { + curr = genGroupByPlanMapAggr3MRWithRollup(dest, qb, curr); + } else { + curr = genGroupByPlan2MR(dest, qb, curr); + } + } else{ + if (isGroupByWithRollup(qb.getParseInfo(), dest)) { + curr = genGroupByPlan2MRWithRollup(dest, qb, curr); + } else { + curr = genGroupByPlan1MR(dest, qb, curr); + } } }