Index: conf/hive-default.xml
===================================================================
--- conf/hive-default.xml (revision 1209310)
+++ conf/hive-default.xml (working copy)
@@ -1186,4 +1186,10 @@
"insert directory '/HIVEFT25686/chinna/' from table"
+
+ hive.multigroupby.singlereducer
+ false
+ If a group of subqueries have the same group by and distinct keys, a common filter will be applied in the mapper and group by operators will be executed in the same reduce phase after their respective filters. If hive.groupby.skewindata, this variable will be ignored. Ignores the variable hive.map.aggr
+
+
Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
===================================================================
--- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1209310)
+++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy)
@@ -300,6 +300,7 @@
HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9),
HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5),
HIVEMULTIGROUPBYSINGLEMR("hive.multigroupby.singlemr", false),
+ HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", false),
// for hive udtf operator
HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false),
Index: ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out (revision 0)
+++ ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out (revision 0)
@@ -0,0 +1,801 @@
+PREHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@DEST1
+PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@DEST2
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL SRC) key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (. (TOK_TABLE_OR_COL SRC) value) 5)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL SRC) key))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-2
+ Stage-4 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Reduce Output Operator
+ key expressions:
+ expr: key
+ type: string
+ sort order: +
+ Map-reduce partition columns:
+ expr: key
+ type: string
+ tag: -1
+ value expressions:
+ expr: substr(value, 5)
+ type: string
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: double
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: double
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: true
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+ Group By Operator
+ aggregations:
+ expr: sum(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: double
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: UDFToInteger(_col0)
+ type: int
+ expr: _col1
+ type: double
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: true
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT DEST1.* FROM DEST1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-29-17_615_6229510078837099937/-mr-10000
+POSTHOOK: query: SELECT DEST1.* FROM DEST1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-29-17_615_6229510078837099937/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+133 133.0
+134 268.0
+136 136.0
+137 274.0
+138 552.0
+143 143.0
+145 145.0
+146 292.0
+149 298.0
+15 30.0
+150 150.0
+152 304.0
+153 153.0
+155 155.0
+156 156.0
+157 157.0
+158 158.0
+160 160.0
+162 162.0
+163 163.0
+164 328.0
+165 330.0
+166 166.0
+167 501.0
+168 168.0
+169 676.0
+17 17.0
+170 170.0
+172 344.0
+174 348.0
+175 350.0
+176 352.0
+177 177.0
+178 178.0
+179 358.0
+18 36.0
+180 180.0
+181 181.0
+183 183.0
+186 186.0
+187 561.0
+189 189.0
+19 19.0
+190 190.0
+191 382.0
+192 192.0
+193 579.0
+194 194.0
+195 390.0
+196 196.0
+197 394.0
+199 597.0
+2 2.0
+20 20.0
+200 400.0
+201 201.0
+202 202.0
+203 406.0
+205 410.0
+207 414.0
+208 624.0
+209 418.0
+213 426.0
+214 214.0
+216 432.0
+217 434.0
+218 218.0
+219 438.0
+221 442.0
+222 222.0
+223 446.0
+224 448.0
+226 226.0
+228 228.0
+229 458.0
+230 1150.0
+233 466.0
+235 235.0
+237 474.0
+238 476.0
+239 478.0
+24 48.0
+241 241.0
+242 484.0
+244 244.0
+247 247.0
+248 248.0
+249 249.0
+252 252.0
+255 510.0
+256 512.0
+257 257.0
+258 258.0
+26 52.0
+260 260.0
+262 262.0
+263 263.0
+265 530.0
+266 266.0
+27 27.0
+272 544.0
+273 819.0
+274 274.0
+275 275.0
+277 1108.0
+278 556.0
+28 28.0
+280 560.0
+281 562.0
+282 564.0
+283 283.0
+284 284.0
+285 285.0
+286 286.0
+287 287.0
+288 576.0
+289 289.0
+291 291.0
+292 292.0
+296 296.0
+298 894.0
+30 30.0
+302 302.0
+305 305.0
+306 306.0
+307 614.0
+308 308.0
+309 618.0
+310 310.0
+311 933.0
+315 315.0
+316 948.0
+317 634.0
+318 954.0
+321 642.0
+322 644.0
+323 323.0
+325 650.0
+327 981.0
+33 33.0
+331 662.0
+332 332.0
+333 666.0
+335 335.0
+336 336.0
+338 338.0
+339 339.0
+34 34.0
+341 341.0
+342 684.0
+344 688.0
+345 345.0
+348 1740.0
+35 105.0
+351 351.0
+353 706.0
+356 356.0
+360 360.0
+362 362.0
+364 364.0
+365 365.0
+366 366.0
+367 734.0
+368 368.0
+369 1107.0
+37 74.0
+373 373.0
+374 374.0
+375 375.0
+377 377.0
+378 378.0
+379 379.0
+382 764.0
+384 1152.0
+386 386.0
+389 389.0
+392 392.0
+393 393.0
+394 394.0
+395 790.0
+396 1188.0
+397 794.0
+399 798.0
+4 4.0
+400 400.0
+401 2005.0
+402 402.0
+403 1209.0
+404 808.0
+406 1624.0
+407 407.0
+409 1227.0
+41 41.0
+411 411.0
+413 826.0
+414 828.0
+417 1251.0
+418 418.0
+419 419.0
+42 84.0
+421 421.0
+424 848.0
+427 427.0
+429 858.0
+43 43.0
+430 1290.0
+431 1293.0
+432 432.0
+435 435.0
+436 436.0
+437 437.0
+438 1314.0
+439 878.0
+44 44.0
+443 443.0
+444 444.0
+446 446.0
+448 448.0
+449 449.0
+452 452.0
+453 453.0
+454 1362.0
+455 455.0
+457 457.0
+458 916.0
+459 918.0
+460 460.0
+462 924.0
+463 926.0
+466 1398.0
+467 467.0
+468 1872.0
+469 2345.0
+47 47.0
+470 470.0
+472 472.0
+475 475.0
+477 477.0
+478 956.0
+479 479.0
+480 1440.0
+481 481.0
+482 482.0
+483 483.0
+484 484.0
+485 485.0
+487 487.0
+489 1956.0
+490 490.0
+491 491.0
+492 984.0
+493 493.0
+494 494.0
+495 495.0
+496 496.0
+497 497.0
+498 1494.0
+5 15.0
+51 102.0
+53 53.0
+54 54.0
+57 57.0
+58 116.0
+64 64.0
+65 65.0
+66 66.0
+67 134.0
+69 69.0
+70 210.0
+72 144.0
+74 74.0
+76 152.0
+77 77.0
+78 78.0
+8 8.0
+80 80.0
+82 82.0
+83 166.0
+84 168.0
+85 85.0
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
+PREHOOK: query: SELECT DEST2.* FROM DEST2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-29-17_830_9068841342528781692/-mr-10000
+POSTHOOK: query: SELECT DEST2.* FROM DEST2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-29-17_830_9068841342528781692/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 0.0
+10 10.0
+100 200.0
+103 206.0
+104 208.0
+105 105.0
+11 11.0
+111 111.0
+113 226.0
+114 114.0
+116 116.0
+118 236.0
+119 357.0
+12 24.0
+120 240.0
+125 250.0
+126 126.0
+128 384.0
+129 258.0
+131 131.0
+133 133.0
+134 268.0
+136 136.0
+137 274.0
+138 552.0
+143 143.0
+145 145.0
+146 292.0
+149 298.0
+15 30.0
+150 150.0
+152 304.0
+153 153.0
+155 155.0
+156 156.0
+157 157.0
+158 158.0
+160 160.0
+162 162.0
+163 163.0
+164 328.0
+165 330.0
+166 166.0
+167 501.0
+168 168.0
+169 676.0
+17 17.0
+170 170.0
+172 344.0
+174 348.0
+175 350.0
+176 352.0
+177 177.0
+178 178.0
+179 358.0
+18 36.0
+180 180.0
+181 181.0
+183 183.0
+186 186.0
+187 561.0
+189 189.0
+19 19.0
+190 190.0
+191 382.0
+192 192.0
+193 579.0
+194 194.0
+195 390.0
+196 196.0
+197 394.0
+199 597.0
+2 2.0
+20 20.0
+200 400.0
+201 201.0
+202 202.0
+203 406.0
+205 410.0
+207 414.0
+208 624.0
+209 418.0
+213 426.0
+214 214.0
+216 432.0
+217 434.0
+218 218.0
+219 438.0
+221 442.0
+222 222.0
+223 446.0
+224 448.0
+226 226.0
+228 228.0
+229 458.0
+230 1150.0
+233 466.0
+235 235.0
+237 474.0
+238 476.0
+239 478.0
+24 48.0
+241 241.0
+242 484.0
+244 244.0
+247 247.0
+248 248.0
+249 249.0
+252 252.0
+255 510.0
+256 512.0
+257 257.0
+258 258.0
+26 52.0
+260 260.0
+262 262.0
+263 263.0
+265 530.0
+266 266.0
+27 27.0
+272 544.0
+273 819.0
+274 274.0
+275 275.0
+277 1108.0
+278 556.0
+28 28.0
+280 560.0
+281 562.0
+282 564.0
+283 283.0
+284 284.0
+285 285.0
+286 286.0
+287 287.0
+288 576.0
+289 289.0
+291 291.0
+292 292.0
+296 296.0
+298 894.0
+30 30.0
+302 302.0
+305 305.0
+306 306.0
+307 614.0
+308 308.0
+309 618.0
+310 310.0
+311 933.0
+315 315.0
+316 948.0
+317 634.0
+318 954.0
+321 642.0
+322 644.0
+323 323.0
+325 650.0
+327 981.0
+33 33.0
+331 662.0
+332 332.0
+333 666.0
+335 335.0
+336 336.0
+338 338.0
+339 339.0
+34 34.0
+341 341.0
+342 684.0
+344 688.0
+345 345.0
+348 1740.0
+35 105.0
+351 351.0
+353 706.0
+356 356.0
+360 360.0
+362 362.0
+364 364.0
+365 365.0
+366 366.0
+367 734.0
+368 368.0
+369 1107.0
+37 74.0
+373 373.0
+374 374.0
+375 375.0
+377 377.0
+378 378.0
+379 379.0
+382 764.0
+384 1152.0
+386 386.0
+389 389.0
+392 392.0
+393 393.0
+394 394.0
+395 790.0
+396 1188.0
+397 794.0
+399 798.0
+4 4.0
+400 400.0
+401 2005.0
+402 402.0
+403 1209.0
+404 808.0
+406 1624.0
+407 407.0
+409 1227.0
+41 41.0
+411 411.0
+413 826.0
+414 828.0
+417 1251.0
+418 418.0
+419 419.0
+42 84.0
+421 421.0
+424 848.0
+427 427.0
+429 858.0
+43 43.0
+430 1290.0
+431 1293.0
+432 432.0
+435 435.0
+436 436.0
+437 437.0
+438 1314.0
+439 878.0
+44 44.0
+443 443.0
+444 444.0
+446 446.0
+448 448.0
+449 449.0
+452 452.0
+453 453.0
+454 1362.0
+455 455.0
+457 457.0
+458 916.0
+459 918.0
+460 460.0
+462 924.0
+463 926.0
+466 1398.0
+467 467.0
+468 1872.0
+469 2345.0
+47 47.0
+470 470.0
+472 472.0
+475 475.0
+477 477.0
+478 956.0
+479 479.0
+480 1440.0
+481 481.0
+482 482.0
+483 483.0
+484 484.0
+485 485.0
+487 487.0
+489 1956.0
+490 490.0
+491 491.0
+492 984.0
+493 493.0
+494 494.0
+495 495.0
+496 496.0
+497 497.0
+498 1494.0
+5 15.0
+51 102.0
+53 53.0
+54 54.0
+57 57.0
+58 116.0
+64 64.0
+65 65.0
+66 66.0
+67 134.0
+69 69.0
+70 210.0
+72 144.0
+74 74.0
+76 152.0
+77 77.0
+78 78.0
+8 8.0
+80 80.0
+82 82.0
+83 166.0
+84 168.0
+85 85.0
+86 86.0
+87 87.0
+9 9.0
+90 270.0
+92 92.0
+95 190.0
+96 96.0
+97 194.0
+98 196.0
Index: ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (revision 0)
+++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out (revision 0)
@@ -0,0 +1,1490 @@
+PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g2
+PREHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g3
+PREHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_g4
+PREHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_h2
+PREHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@dest_h3
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))
+
+STAGE DEPENDENCIES:
+ Stage-3 is a root stage
+ Stage-0 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-3
+ Stage-5 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-3
+ Stage-6 depends on stages: Stage-2
+
+STAGE PLANS:
+ Stage: Stage-3
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Reduce Output Operator
+ key expressions:
+ expr: substr(key, 1, 1)
+ type: string
+ expr: substr(value, 5)
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: substr(key, 1, 1)
+ type: string
+ tag: -1
+ value expressions:
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Forward
+ Filter Operator
+ predicate:
+ expr: (KEY._col0 >= 5)
+ type: boolean
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+ Filter Operator
+ predicate:
+ expr: (KEY._col0 < 5)
+ type: boolean
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 3
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+
+ Stage: Stage-4
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
+
+ Stage: Stage-6
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+PREHOOK: Output: default@dest_g3
+PREHOOK: Output: default@dest_g4
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Output: default@dest_g4
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: SELECT * FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-37_678_1190766748759708450/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-37_678_1190766748759708450/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+5 6 5397.0 278 10
+6 5 6398.0 331 6
+7 6 7735.0 447 10
+8 8 8762.0 595 10
+9 7 91047.0 577 12
+PREHOOK: query: SELECT * FROM dest_g3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g3
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-37_934_2856960859241427923/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g3
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-37_934_2856960859241427923/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1 00.0 0 3
+1 71 116414.0 10044 115
+2 69 225571.0 15780 111
+3 62 332004.0 20119 99
+4 74 452763.0 30965 124
+PREHOOK: query: SELECT * FROM dest_g4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g4
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-38_089_5201047574673434199/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g4
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-38_089_5201047574673434199/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+0 1 00.0 0 3
+1 71 116414.0 10044 115
+2 69 225571.0 15780 111
+3 62 332004.0 20119 99
+4 74 452763.0 30965 124
+5 6 5397.0 278 10
+6 5 6398.0 331 6
+7 6 7735.0 447 10
+8 8 8762.0 595 10
+9 7 91047.0 577 12
+PREHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1))))
+
+STAGE DEPENDENCIES:
+ Stage-5 is a root stage
+ Stage-0 depends on stages: Stage-5
+ Stage-6 depends on stages: Stage-0
+ Stage-1 depends on stages: Stage-5
+ Stage-7 depends on stages: Stage-1
+ Stage-2 depends on stages: Stage-5
+ Stage-8 depends on stages: Stage-2
+ Stage-9 depends on stages: Stage-5
+ Stage-3 depends on stages: Stage-9
+ Stage-10 depends on stages: Stage-3
+ Stage-4 depends on stages: Stage-9
+ Stage-11 depends on stages: Stage-4
+
+STAGE PLANS:
+ Stage: Stage-5
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Reduce Output Operator
+ key expressions:
+ expr: substr(key, 1, 1)
+ type: string
+ expr: substr(value, 5)
+ type: string
+ sort order: ++
+ Map-reduce partition columns:
+ expr: substr(key, 1, 1)
+ type: string
+ tag: -1
+ value expressions:
+ expr: value
+ type: string
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Operator Tree:
+ Forward
+ Filter Operator
+ predicate:
+ expr: (KEY._col0 >= 5)
+ type: boolean
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+ Filter Operator
+ predicate:
+ expr: (KEY._col0 < 5)
+ type: boolean
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col1:1._col0)
+ expr: sum(KEY._col1:1._col0)
+ expr: sum(DISTINCT KEY._col1:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col1
+ type: bigint
+ expr: concat(_col0, _col2)
+ type: string
+ expr: _col3
+ type: double
+ expr: _col4
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 3
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g2
+
+ Stage: Stage-6
+ Stats-Aggr Operator
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g3
+
+ Stage: Stage-7
+ Stats-Aggr Operator
+
+ Stage: Stage-2
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_g4
+
+ Stage: Stage-8
+ Stats-Aggr Operator
+
+ Stage: Stage-9
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-38_258_1010654852771769899/-mr-10010
+ Reduce Output Operator
+ key expressions:
+ expr: substr(key, 1, 1)
+ type: string
+ expr: substr(key, 2, 1)
+ type: string
+ expr: substr(value, 5)
+ type: string
+ sort order: +++
+ Map-reduce partition columns:
+ expr: substr(key, 1, 1)
+ type: string
+ expr: substr(key, 2, 1)
+ type: string
+ tag: -1
+ value expressions:
+ expr: value
+ type: string
+ Reduce Operator Tree:
+ Forward
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:1._col0)
+ expr: sum(KEY._col2:1._col0)
+ expr: sum(DISTINCT KEY._col2:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ expr: concat(_col0, _col3)
+ type: string
+ expr: _col4
+ type: double
+ expr: _col5
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 4
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_h2
+ Filter Operator
+ predicate:
+ expr: (KEY._col0 >= 5)
+ type: boolean
+ Group By Operator
+ aggregations:
+ expr: count(DISTINCT KEY._col2:1._col0)
+ expr: sum(KEY._col2:1._col0)
+ expr: sum(DISTINCT KEY._col2:1._col0)
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: string
+ expr: KEY._col1
+ type: string
+ mode: complete
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: _col2
+ type: bigint
+ expr: concat(_col0, _col3)
+ type: string
+ expr: _col4
+ type: double
+ expr: _col5
+ type: bigint
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Select Operator
+ expressions:
+ expr: _col0
+ type: string
+ expr: UDFToInteger(_col1)
+ type: int
+ expr: _col2
+ type: string
+ expr: UDFToInteger(_col3)
+ type: int
+ expr: UDFToInteger(_col4)
+ type: int
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ File Output Operator
+ compressed: false
+ GlobalTableId: 5
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_h3
+
+ Stage: Stage-3
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_h2
+
+ Stage: Stage-10
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest_h3
+
+ Stage: Stage-11
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest_g2
+PREHOOK: Output: default@dest_g3
+PREHOOK: Output: default@dest_g4
+PREHOOK: Output: default@dest_h2
+PREHOOK: Output: default@dest_h3
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Output: default@dest_g4
+POSTHOOK: Output: default@dest_h2
+POSTHOOK: Output: default@dest_h3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: SELECT * FROM dest_g2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g2
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_294_1322443789908760601/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g2
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_294_1322443789908760601/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+5 6 5397.0 278 10
+6 5 6398.0 331 6
+7 6 7735.0 447 10
+8 8 8762.0 595 10
+9 7 91047.0 577 12
+PREHOOK: query: SELECT * FROM dest_g3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g3
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_465_4774833074525358195/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g3
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_465_4774833074525358195/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1 00.0 0 3
+1 71 116414.0 10044 115
+2 69 225571.0 15780 111
+3 62 332004.0 20119 99
+4 74 452763.0 30965 124
+PREHOOK: query: SELECT * FROM dest_g4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_g4
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_643_7911266339318391566/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_g4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_g4
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_643_7911266339318391566/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1 00.0 0 3
+1 71 116414.0 10044 115
+2 69 225571.0 15780 111
+3 62 332004.0 20119 99
+4 74 452763.0 30965 124
+5 6 5397.0 278 10
+6 5 6398.0 331 6
+7 6 7735.0 447 10
+8 8 8762.0 595 10
+9 7 91047.0 577 12
+PREHOOK: query: SELECT * FROM dest_h2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_h2
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_815_8736720864393713198/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_h2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_h2
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_815_8736720864393713198/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+0 1 00.0 0 3
+1 5 1729.0 422 8
+1 7 11171.0 702 11
+1 6 11282.0 640 12
+1 6 11494.0 809 11
+1 4 1878.0 583 6
+1 8 11263.0 1096 10
+1 9 12654.0 1484 16
+1 9 12294.0 1418 14
+1 7 11516.0 1124 10
+1 10 13133.0 1766 17
+2 1 22.0 2 1
+2 9 23095.0 1655 16
+2 6 22162.0 1297 10
+2 7 22470.0 1573 11
+2 6 23279.0 1412 14
+2 7 21761.0 1495 9
+2 5 21789.0 1278 7
+2 6 21633.0 1342 8
+2 7 23603.0 1676 14
+2 11 24004.0 2873 15
+2 4 21773.0 1177 6
+3 7 32483.0 1867 9
+3 6 34094.0 1887 13
+3 5 33240.0 1618 10
+3 8 33041.0 2377 10
+3 6 33832.0 1754 12
+3 4 31518.0 1095 7
+3 8 34026.0 2921 11
+3 7 32330.0 2293 8
+3 4 32691.0 1541 7
+3 7 34749.0 2766 12
+4 1 44.0 4 1
+4 8 48082.0 3232 20
+4 7 44194.0 2533 11
+4 5 42638.0 1743 8
+4 9 46558.0 3521 16
+4 6 42274.0 2274 6
+4 7 45013.0 3188 11
+4 7 48392.0 3255 18
+4 7 43376.0 2898 8
+4 8 46298.0 3871 13
+4 9 45934.0 4446 12
+5 1 515.0 5 3
+5 1 5102.0 51 2
+5 1 553.0 53 1
+5 1 554.0 54 1
+5 1 557.0 57 1
+5 1 5116.0 58 2
+6 1 664.0 64 1
+6 1 665.0 65 1
+6 1 666.0 66 1
+6 1 6134.0 67 2
+6 1 669.0 69 1
+7 1 7210.0 70 3
+7 1 7144.0 72 2
+7 1 774.0 74 1
+7 1 7152.0 76 2
+7 1 777.0 77 1
+7 1 778.0 78 1
+8 1 88.0 8 1
+8 1 880.0 80 1
+8 1 882.0 82 1
+8 1 8166.0 83 2
+8 1 8168.0 84 2
+8 1 885.0 85 1
+8 1 886.0 86 1
+8 1 887.0 87 1
+9 1 99.0 9 1
+9 1 9270.0 90 3
+9 1 992.0 92 1
+9 1 9190.0 95 2
+9 1 996.0 96 1
+9 1 9194.0 97 2
+9 1 9196.0 98 2
+PREHOOK: query: SELECT * FROM dest_h3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_h3
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_964_6785669777089425431/-mr-10000
+POSTHOOK: query: SELECT * FROM dest_h3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_h3
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-12-01_16-41-57_964_6785669777089425431/-mr-10000
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+5 1 515.0 5 3
+5 1 5102.0 51 2
+5 1 553.0 53 1
+5 1 554.0 54 1
+5 1 557.0 57 1
+5 1 5116.0 58 2
+6 1 664.0 64 1
+6 1 665.0 65 1
+6 1 666.0 66 1
+6 1 6134.0 67 2
+6 1 669.0 69 1
+7 1 7210.0 70 3
+7 1 7144.0 72 2
+7 1 774.0 74 1
+7 1 7152.0 76 2
+7 1 777.0 77 1
+7 1 778.0 78 1
+8 1 88.0 8 1
+8 1 880.0 80 1
+8 1 882.0 82 1
+8 1 8166.0 83 2
+8 1 8168.0 84 2
+8 1 885.0 85 1
+8 1 886.0 86 1
+8 1 887.0 87 1
+9 1 99.0 9 1
+9 1 9270.0 90 3
+9 1 992.0 92 1
+9 1 9190.0 95 2
+9 1 996.0 96 1
+9 1 9194.0 97 2
+9 1 9196.0 98 2
+PREHOOK: query: DROP TABLE dest_g2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g2
+PREHOOK: Output: default@dest_g2
+POSTHOOK: query: DROP TABLE dest_g2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g2
+POSTHOOK: Output: default@dest_g2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_g3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g3
+PREHOOK: Output: default@dest_g3
+POSTHOOK: query: DROP TABLE dest_g3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g3
+POSTHOOK: Output: default@dest_g3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_g4
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_g4
+PREHOOK: Output: default@dest_g4
+POSTHOOK: query: DROP TABLE dest_g4
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_g4
+POSTHOOK: Output: default@dest_g4
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_h2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_h2
+PREHOOK: Output: default@dest_h2
+POSTHOOK: query: DROP TABLE dest_h2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_h2
+POSTHOOK: Output: default@dest_h2
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: DROP TABLE dest_h3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@dest_h3
+PREHOOK: Output: default@dest_h3
+POSTHOOK: query: DROP TABLE dest_h3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@dest_h3
+POSTHOOK: Output: default@dest_h3
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g3.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c1 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c3 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_g4.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h2.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest_h3.c4 EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest_h3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
Index: ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out
===================================================================
--- ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out (revision 0)
+++ ql/src/test/results/clientpositive/groupby_complex_types_multi_single_reducer.q.out (revision 0)
@@ -0,0 +1,841 @@
+PREHOOK: query: CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@DEST1
+PREHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@DEST2
+PREHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+POSTHOOK: type: QUERY
+ABSTRACT SYNTAX TREE:
+ (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME SRC))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION ARRAY (. (TOK_TABLE_OR_COL SRC) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value))) (TOK_SELEXPR (TOK_FUNCTION COUNT 1))) (TOK_GROUPBY (TOK_FUNCTION MAP (. (TOK_TABLE_OR_COL SRC) key) (. (TOK_TABLE_OR_COL SRC) value)))))
+
+STAGE DEPENDENCIES:
+ Stage-2 is a root stage
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
+ Stage-1 depends on stages: Stage-4
+ Stage-5 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-2
+ Map Reduce
+ Alias -> Map Operator Tree:
+ src
+ TableScan
+ alias: src
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ outputColumnNames: key
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: array(key)
+ type: array
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: array
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: array
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Select Operator
+ expressions:
+ expr: key
+ type: string
+ expr: value
+ type: string
+ outputColumnNames: key, value
+ Group By Operator
+ aggregations:
+ expr: count(1)
+ bucketGroup: false
+ keys:
+ expr: map(key:value)
+ type: map
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: array
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: array
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 1
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest1
+
+ Stage: Stage-3
+ Stats-Aggr Operator
+
+ Stage: Stage-4
+ Map Reduce
+ Alias -> Map Operator Tree:
+ file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-15-39_099_7084897200004262343/-mr-10004
+ Reduce Output Operator
+ key expressions:
+ expr: _col0
+ type: map
+ sort order: +
+ Map-reduce partition columns:
+ expr: _col0
+ type: map
+ tag: -1
+ value expressions:
+ expr: _col1
+ type: bigint
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations:
+ expr: count(VALUE._col0)
+ bucketGroup: false
+ keys:
+ expr: KEY._col0
+ type: map
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Select Operator
+ expressions:
+ expr: _col0
+ type: map
+ expr: _col1
+ type: bigint
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ GlobalTableId: 2
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-1
+ Move Operator
+ tables:
+ replace: true
+ table:
+ input format: org.apache.hadoop.mapred.TextInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ name: default.dest2
+
+ Stage: Stage-5
+ Stats-Aggr Operator
+
+
+PREHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@dest1
+PREHOOK: Output: default@dest2
+POSTHOOK: query: FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@dest1
+POSTHOOK: Output: default@dest2
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ]
+PREHOOK: query: SELECT DEST1.* FROM DEST1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-15-58_380_3646098109511713592/-mr-10000
+POSTHOOK: query: SELECT DEST1.* FROM DEST1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest1
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-15-58_380_3646098109511713592/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ]
+["0"] 3
+["10"] 1
+["100"] 2
+["103"] 2
+["104"] 2
+["105"] 1
+["11"] 1
+["111"] 1
+["113"] 2
+["114"] 1
+["116"] 1
+["118"] 2
+["119"] 3
+["12"] 2
+["120"] 2
+["125"] 2
+["126"] 1
+["128"] 3
+["129"] 2
+["131"] 1
+["133"] 1
+["134"] 2
+["136"] 1
+["137"] 2
+["138"] 4
+["143"] 1
+["145"] 1
+["146"] 2
+["149"] 2
+["15"] 2
+["150"] 1
+["152"] 2
+["153"] 1
+["155"] 1
+["156"] 1
+["157"] 1
+["158"] 1
+["160"] 1
+["162"] 1
+["163"] 1
+["164"] 2
+["165"] 2
+["166"] 1
+["167"] 3
+["168"] 1
+["169"] 4
+["17"] 1
+["170"] 1
+["172"] 2
+["174"] 2
+["175"] 2
+["176"] 2
+["177"] 1
+["178"] 1
+["179"] 2
+["18"] 2
+["180"] 1
+["181"] 1
+["183"] 1
+["186"] 1
+["187"] 3
+["189"] 1
+["19"] 1
+["190"] 1
+["191"] 2
+["192"] 1
+["193"] 3
+["194"] 1
+["195"] 2
+["196"] 1
+["197"] 2
+["199"] 3
+["2"] 1
+["20"] 1
+["200"] 2
+["201"] 1
+["202"] 1
+["203"] 2
+["205"] 2
+["207"] 2
+["208"] 3
+["209"] 2
+["213"] 2
+["214"] 1
+["216"] 2
+["217"] 2
+["218"] 1
+["219"] 2
+["221"] 2
+["222"] 1
+["223"] 2
+["224"] 2
+["226"] 1
+["228"] 1
+["229"] 2
+["230"] 5
+["233"] 2
+["235"] 1
+["237"] 2
+["238"] 2
+["239"] 2
+["24"] 2
+["241"] 1
+["242"] 2
+["244"] 1
+["247"] 1
+["248"] 1
+["249"] 1
+["252"] 1
+["255"] 2
+["256"] 2
+["257"] 1
+["258"] 1
+["26"] 2
+["260"] 1
+["262"] 1
+["263"] 1
+["265"] 2
+["266"] 1
+["27"] 1
+["272"] 2
+["273"] 3
+["274"] 1
+["275"] 1
+["277"] 4
+["278"] 2
+["28"] 1
+["280"] 2
+["281"] 2
+["282"] 2
+["283"] 1
+["284"] 1
+["285"] 1
+["286"] 1
+["287"] 1
+["288"] 2
+["289"] 1
+["291"] 1
+["292"] 1
+["296"] 1
+["298"] 3
+["30"] 1
+["302"] 1
+["305"] 1
+["306"] 1
+["307"] 2
+["308"] 1
+["309"] 2
+["310"] 1
+["311"] 3
+["315"] 1
+["316"] 3
+["317"] 2
+["318"] 3
+["321"] 2
+["322"] 2
+["323"] 1
+["325"] 2
+["327"] 3
+["33"] 1
+["331"] 2
+["332"] 1
+["333"] 2
+["335"] 1
+["336"] 1
+["338"] 1
+["339"] 1
+["34"] 1
+["341"] 1
+["342"] 2
+["344"] 2
+["345"] 1
+["348"] 5
+["35"] 3
+["351"] 1
+["353"] 2
+["356"] 1
+["360"] 1
+["362"] 1
+["364"] 1
+["365"] 1
+["366"] 1
+["367"] 2
+["368"] 1
+["369"] 3
+["37"] 2
+["373"] 1
+["374"] 1
+["375"] 1
+["377"] 1
+["378"] 1
+["379"] 1
+["382"] 2
+["384"] 3
+["386"] 1
+["389"] 1
+["392"] 1
+["393"] 1
+["394"] 1
+["395"] 2
+["396"] 3
+["397"] 2
+["399"] 2
+["4"] 1
+["400"] 1
+["401"] 5
+["402"] 1
+["403"] 3
+["404"] 2
+["406"] 4
+["407"] 1
+["409"] 3
+["41"] 1
+["411"] 1
+["413"] 2
+["414"] 2
+["417"] 3
+["418"] 1
+["419"] 1
+["42"] 2
+["421"] 1
+["424"] 2
+["427"] 1
+["429"] 2
+["43"] 1
+["430"] 3
+["431"] 3
+["432"] 1
+["435"] 1
+["436"] 1
+["437"] 1
+["438"] 3
+["439"] 2
+["44"] 1
+["443"] 1
+["444"] 1
+["446"] 1
+["448"] 1
+["449"] 1
+["452"] 1
+["453"] 1
+["454"] 3
+["455"] 1
+["457"] 1
+["458"] 2
+["459"] 2
+["460"] 1
+["462"] 2
+["463"] 2
+["466"] 3
+["467"] 1
+["468"] 4
+["469"] 5
+["47"] 1
+["470"] 1
+["472"] 1
+["475"] 1
+["477"] 1
+["478"] 2
+["479"] 1
+["480"] 3
+["481"] 1
+["482"] 1
+["483"] 1
+["484"] 1
+["485"] 1
+["487"] 1
+["489"] 4
+["490"] 1
+["491"] 1
+["492"] 2
+["493"] 1
+["494"] 1
+["495"] 1
+["496"] 1
+["497"] 1
+["498"] 3
+["5"] 3
+["51"] 2
+["53"] 1
+["54"] 1
+["57"] 1
+["58"] 2
+["64"] 1
+["65"] 1
+["66"] 1
+["67"] 2
+["69"] 1
+["70"] 3
+["72"] 2
+["74"] 1
+["76"] 2
+["77"] 1
+["78"] 1
+["8"] 1
+["80"] 1
+["82"] 1
+["83"] 2
+["84"] 2
+["85"] 1
+["86"] 1
+["87"] 1
+["9"] 1
+["90"] 3
+["92"] 1
+["95"] 2
+["96"] 1
+["97"] 2
+["98"] 2
+PREHOOK: query: SELECT DEST2.* FROM DEST2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest2
+PREHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-15-58_574_8004447145043580956/-mr-10000
+POSTHOOK: query: SELECT DEST2.* FROM DEST2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest2
+POSTHOOK: Output: file:/var/folders/Y1/Y1Kf7th8FAawW1lYb6Tt+l+pemQ/-Tmp-/kevinwilfong/hive_2011-11-16_14-15-58_574_8004447145043580956/-mr-10000
+POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ]
+POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ]
+{"0":"val_0"} 3
+{"10":"val_10"} 1
+{"100":"val_100"} 2
+{"103":"val_103"} 2
+{"104":"val_104"} 2
+{"105":"val_105"} 1
+{"11":"val_11"} 1
+{"111":"val_111"} 1
+{"113":"val_113"} 2
+{"114":"val_114"} 1
+{"116":"val_116"} 1
+{"118":"val_118"} 2
+{"119":"val_119"} 3
+{"12":"val_12"} 2
+{"120":"val_120"} 2
+{"125":"val_125"} 2
+{"126":"val_126"} 1
+{"128":"val_128"} 3
+{"129":"val_129"} 2
+{"131":"val_131"} 1
+{"133":"val_133"} 1
+{"134":"val_134"} 2
+{"136":"val_136"} 1
+{"137":"val_137"} 2
+{"138":"val_138"} 4
+{"143":"val_143"} 1
+{"145":"val_145"} 1
+{"146":"val_146"} 2
+{"149":"val_149"} 2
+{"15":"val_15"} 2
+{"150":"val_150"} 1
+{"152":"val_152"} 2
+{"153":"val_153"} 1
+{"155":"val_155"} 1
+{"156":"val_156"} 1
+{"157":"val_157"} 1
+{"158":"val_158"} 1
+{"160":"val_160"} 1
+{"162":"val_162"} 1
+{"163":"val_163"} 1
+{"164":"val_164"} 2
+{"165":"val_165"} 2
+{"166":"val_166"} 1
+{"167":"val_167"} 3
+{"168":"val_168"} 1
+{"169":"val_169"} 4
+{"17":"val_17"} 1
+{"170":"val_170"} 1
+{"172":"val_172"} 2
+{"174":"val_174"} 2
+{"175":"val_175"} 2
+{"176":"val_176"} 2
+{"177":"val_177"} 1
+{"178":"val_178"} 1
+{"179":"val_179"} 2
+{"18":"val_18"} 2
+{"180":"val_180"} 1
+{"181":"val_181"} 1
+{"183":"val_183"} 1
+{"186":"val_186"} 1
+{"187":"val_187"} 3
+{"189":"val_189"} 1
+{"19":"val_19"} 1
+{"190":"val_190"} 1
+{"191":"val_191"} 2
+{"192":"val_192"} 1
+{"193":"val_193"} 3
+{"194":"val_194"} 1
+{"195":"val_195"} 2
+{"196":"val_196"} 1
+{"197":"val_197"} 2
+{"199":"val_199"} 3
+{"2":"val_2"} 1
+{"20":"val_20"} 1
+{"200":"val_200"} 2
+{"201":"val_201"} 1
+{"202":"val_202"} 1
+{"203":"val_203"} 2
+{"205":"val_205"} 2
+{"207":"val_207"} 2
+{"208":"val_208"} 3
+{"209":"val_209"} 2
+{"213":"val_213"} 2
+{"214":"val_214"} 1
+{"216":"val_216"} 2
+{"217":"val_217"} 2
+{"218":"val_218"} 1
+{"219":"val_219"} 2
+{"221":"val_221"} 2
+{"222":"val_222"} 1
+{"223":"val_223"} 2
+{"224":"val_224"} 2
+{"226":"val_226"} 1
+{"228":"val_228"} 1
+{"229":"val_229"} 2
+{"230":"val_230"} 5
+{"233":"val_233"} 2
+{"235":"val_235"} 1
+{"237":"val_237"} 2
+{"238":"val_238"} 2
+{"239":"val_239"} 2
+{"24":"val_24"} 2
+{"241":"val_241"} 1
+{"242":"val_242"} 2
+{"244":"val_244"} 1
+{"247":"val_247"} 1
+{"248":"val_248"} 1
+{"249":"val_249"} 1
+{"252":"val_252"} 1
+{"255":"val_255"} 2
+{"256":"val_256"} 2
+{"257":"val_257"} 1
+{"258":"val_258"} 1
+{"26":"val_26"} 2
+{"260":"val_260"} 1
+{"262":"val_262"} 1
+{"263":"val_263"} 1
+{"265":"val_265"} 2
+{"266":"val_266"} 1
+{"27":"val_27"} 1
+{"272":"val_272"} 2
+{"273":"val_273"} 3
+{"274":"val_274"} 1
+{"275":"val_275"} 1
+{"277":"val_277"} 4
+{"278":"val_278"} 2
+{"28":"val_28"} 1
+{"280":"val_280"} 2
+{"281":"val_281"} 2
+{"282":"val_282"} 2
+{"283":"val_283"} 1
+{"284":"val_284"} 1
+{"285":"val_285"} 1
+{"286":"val_286"} 1
+{"287":"val_287"} 1
+{"288":"val_288"} 2
+{"289":"val_289"} 1
+{"291":"val_291"} 1
+{"292":"val_292"} 1
+{"296":"val_296"} 1
+{"298":"val_298"} 3
+{"30":"val_30"} 1
+{"302":"val_302"} 1
+{"305":"val_305"} 1
+{"306":"val_306"} 1
+{"307":"val_307"} 2
+{"308":"val_308"} 1
+{"309":"val_309"} 2
+{"310":"val_310"} 1
+{"311":"val_311"} 3
+{"315":"val_315"} 1
+{"316":"val_316"} 3
+{"317":"val_317"} 2
+{"318":"val_318"} 3
+{"321":"val_321"} 2
+{"322":"val_322"} 2
+{"323":"val_323"} 1
+{"325":"val_325"} 2
+{"327":"val_327"} 3
+{"33":"val_33"} 1
+{"331":"val_331"} 2
+{"332":"val_332"} 1
+{"333":"val_333"} 2
+{"335":"val_335"} 1
+{"336":"val_336"} 1
+{"338":"val_338"} 1
+{"339":"val_339"} 1
+{"34":"val_34"} 1
+{"341":"val_341"} 1
+{"342":"val_342"} 2
+{"344":"val_344"} 2
+{"345":"val_345"} 1
+{"348":"val_348"} 5
+{"35":"val_35"} 3
+{"351":"val_351"} 1
+{"353":"val_353"} 2
+{"356":"val_356"} 1
+{"360":"val_360"} 1
+{"362":"val_362"} 1
+{"364":"val_364"} 1
+{"365":"val_365"} 1
+{"366":"val_366"} 1
+{"367":"val_367"} 2
+{"368":"val_368"} 1
+{"369":"val_369"} 3
+{"37":"val_37"} 2
+{"373":"val_373"} 1
+{"374":"val_374"} 1
+{"375":"val_375"} 1
+{"377":"val_377"} 1
+{"378":"val_378"} 1
+{"379":"val_379"} 1
+{"382":"val_382"} 2
+{"384":"val_384"} 3
+{"386":"val_386"} 1
+{"389":"val_389"} 1
+{"392":"val_392"} 1
+{"393":"val_393"} 1
+{"394":"val_394"} 1
+{"395":"val_395"} 2
+{"396":"val_396"} 3
+{"397":"val_397"} 2
+{"399":"val_399"} 2
+{"4":"val_4"} 1
+{"400":"val_400"} 1
+{"401":"val_401"} 5
+{"402":"val_402"} 1
+{"403":"val_403"} 3
+{"404":"val_404"} 2
+{"406":"val_406"} 4
+{"407":"val_407"} 1
+{"409":"val_409"} 3
+{"41":"val_41"} 1
+{"411":"val_411"} 1
+{"413":"val_413"} 2
+{"414":"val_414"} 2
+{"417":"val_417"} 3
+{"418":"val_418"} 1
+{"419":"val_419"} 1
+{"42":"val_42"} 2
+{"421":"val_421"} 1
+{"424":"val_424"} 2
+{"427":"val_427"} 1
+{"429":"val_429"} 2
+{"43":"val_43"} 1
+{"430":"val_430"} 3
+{"431":"val_431"} 3
+{"432":"val_432"} 1
+{"435":"val_435"} 1
+{"436":"val_436"} 1
+{"437":"val_437"} 1
+{"438":"val_438"} 3
+{"439":"val_439"} 2
+{"44":"val_44"} 1
+{"443":"val_443"} 1
+{"444":"val_444"} 1
+{"446":"val_446"} 1
+{"448":"val_448"} 1
+{"449":"val_449"} 1
+{"452":"val_452"} 1
+{"453":"val_453"} 1
+{"454":"val_454"} 3
+{"455":"val_455"} 1
+{"457":"val_457"} 1
+{"458":"val_458"} 2
+{"459":"val_459"} 2
+{"460":"val_460"} 1
+{"462":"val_462"} 2
+{"463":"val_463"} 2
+{"466":"val_466"} 3
+{"467":"val_467"} 1
+{"468":"val_468"} 4
+{"469":"val_469"} 5
+{"47":"val_47"} 1
+{"470":"val_470"} 1
+{"472":"val_472"} 1
+{"475":"val_475"} 1
+{"477":"val_477"} 1
+{"478":"val_478"} 2
+{"479":"val_479"} 1
+{"480":"val_480"} 3
+{"481":"val_481"} 1
+{"482":"val_482"} 1
+{"483":"val_483"} 1
+{"484":"val_484"} 1
+{"485":"val_485"} 1
+{"487":"val_487"} 1
+{"489":"val_489"} 4
+{"490":"val_490"} 1
+{"491":"val_491"} 1
+{"492":"val_492"} 2
+{"493":"val_493"} 1
+{"494":"val_494"} 1
+{"495":"val_495"} 1
+{"496":"val_496"} 1
+{"497":"val_497"} 1
+{"498":"val_498"} 3
+{"5":"val_5"} 3
+{"51":"val_51"} 2
+{"53":"val_53"} 1
+{"54":"val_54"} 1
+{"57":"val_57"} 1
+{"58":"val_58"} 2
+{"64":"val_64"} 1
+{"65":"val_65"} 1
+{"66":"val_66"} 1
+{"67":"val_67"} 2
+{"69":"val_69"} 1
+{"70":"val_70"} 3
+{"72":"val_72"} 2
+{"74":"val_74"} 1
+{"76":"val_76"} 2
+{"77":"val_77"} 1
+{"78":"val_78"} 1
+{"8":"val_8"} 1
+{"80":"val_80"} 1
+{"82":"val_82"} 1
+{"83":"val_83"} 2
+{"84":"val_84"} 2
+{"85":"val_85"} 1
+{"86":"val_86"} 1
+{"87":"val_87"} 1
+{"9":"val_9"} 1
+{"90":"val_90"} 3
+{"92":"val_92"} 1
+{"95":"val_95"} 2
+{"96":"val_96"} 1
+{"97":"val_97"} 2
+{"98":"val_98"} 2
Index: ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q
===================================================================
--- ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q (revision 0)
+++ ql/src/test/queries/clientpositive/groupby_multi_single_reducer.q (revision 0)
@@ -0,0 +1,49 @@
+set hive.multigroupby.singlereducer=true;
+
+CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1);
+
+SELECT * FROM dest_g2;
+SELECT * FROM dest_g3;
+SELECT * FROM dest_g4;
+
+EXPLAIN
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
+
+FROM src
+INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
+INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1)
+INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);
+
+SELECT * FROM dest_g2;
+SELECT * FROM dest_g3;
+SELECT * FROM dest_g4;
+SELECT * FROM dest_h2;
+SELECT * FROM dest_h3;
+
+DROP TABLE dest_g2;
+DROP TABLE dest_g3;
+DROP TABLE dest_g4;
+DROP TABLE dest_h2;
+DROP TABLE dest_h3;
\ No newline at end of file
Index: ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q
===================================================================
--- ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q (revision 0)
+++ ql/src/test/queries/clientpositive/groupby7_noskew_multi_single_reducer.q (revision 0)
@@ -0,0 +1,22 @@
+set hive.map.aggr=false;
+set hive.multigroupby.singlereducer=true;
+set hive.groupby.skewindata=false;
+set mapred.reduce.tasks=31;
+
+CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE;
+CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE;
+
+SET hive.exec.compress.intermediate=true;
+SET hive.exec.compress.output=true;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key
+INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key;
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
Index: ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q
===================================================================
--- ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q (revision 0)
+++ ql/src/test/queries/clientpositive/groupby_complex_types_multi_single_reducer.q (revision 0)
@@ -0,0 +1,17 @@
+set hive.multigroupby.singlereducer=true;
+
+CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE;
+CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE;
+
+EXPLAIN
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value);
+
+FROM SRC
+INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key)
+INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value);
+
+SELECT DEST1.* FROM DEST1;
+SELECT DEST2.* FROM DEST2;
+
Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java (revision 1209310)
+++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDesc.java (working copy)
@@ -22,9 +22,9 @@
import java.util.List;
import org.apache.hadoop.hive.ql.lib.Node;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
/**
* ExprNodeDesc.
@@ -90,4 +90,30 @@
return this.getClass().getName();
}
+ // This wraps an instance of an ExprNodeDesc, and makes equals work like isSame, see comment on isSame
+ public static class ExprNodeDescEqualityWrapper {
+ private ExprNodeDesc exprNodeDesc;
+
+ public ExprNodeDescEqualityWrapper(ExprNodeDesc exprNodeDesc) {
+ this.exprNodeDesc = exprNodeDesc;
+ }
+
+ public ExprNodeDesc getExprNodeDesc() {
+ return exprNodeDesc;
+ }
+
+ public void setExprNodeDesc(ExprNodeDesc exprNodeDesc) {
+ this.exprNodeDesc = exprNodeDesc;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+
+ if (other == null || !(other instanceof ExprNodeDescEqualityWrapper)) {
+ return false;
+ }
+
+ return this.exprNodeDesc.isSame(((ExprNodeDescEqualityWrapper)other).getExprNodeDesc());
+ }
+ }
}
Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
===================================================================
--- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1209310)
+++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy)
@@ -48,6 +48,7 @@
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.ExecDriver;
@@ -58,7 +59,6 @@
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.JoinOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
-import org.apache.hadoop.hive.ql.exec.ArchiveUtils;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorFactory;
import org.apache.hadoop.hive.ql.exec.RecordReader;
@@ -153,6 +153,7 @@
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.Deserializer;
@@ -2966,7 +2967,172 @@
return rsOp;
}
+ @SuppressWarnings("nls")
+ private Operator genCommonGroupByPlanReduceSinkOperator(QB qb, List dests,
+ Operator inputOperatorInfo) throws SemanticException {
+
+ RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo)
+ .getRowResolver();
+ QBParseInfo parseInfo = qb.getParseInfo();
+ RowResolver reduceSinkOutputRowResolver = new RowResolver();
+ reduceSinkOutputRowResolver.setIsExprResolver(true);
+ Map colExprMap = new HashMap();
+ ArrayList reduceKeys = new ArrayList();
+
+ // The group by keys and distinct keys should be the same for all dests, so using the first
+ // one to produce these will be the same as using any other.
+ String dest = dests.get(0);
+
+ // Pre-compute group-by keys and store in reduceKeys
+ List outputKeyColumnNames = new ArrayList();
+ List outputValueColumnNames = new ArrayList();
+ List grpByExprs = getGroupByForClause(parseInfo, dest);
+ for (int i = 0; i < grpByExprs.size(); ++i) {
+ ASTNode grpbyExpr = grpByExprs.get(i);
+ ExprNodeDesc inputExpr = genExprNodeDesc(grpbyExpr,
+ reduceSinkInputRowResolver);
+ reduceKeys.add(inputExpr);
+ if (reduceSinkOutputRowResolver.getExpression(grpbyExpr) == null) {
+ outputKeyColumnNames.add(getColumnInternalName(reduceKeys.size() - 1));
+ String field = Utilities.ReduceField.KEY.toString() + "."
+ + getColumnInternalName(reduceKeys.size() - 1);
+ ColumnInfo colInfo = new ColumnInfo(field, reduceKeys.get(
+ reduceKeys.size() - 1).getTypeInfo(), null, false);
+ reduceSinkOutputRowResolver.putExpression(grpbyExpr, colInfo);
+ colExprMap.put(colInfo.getInternalName(), inputExpr);
+ } else {
+ throw new SemanticException(ErrorMsg.DUPLICATE_GROUPBY_KEY
+ .getMsg(grpbyExpr));
+ }
+ }
+
+ List> distinctColIndices = new ArrayList>();
+ // If there is a distinctFuncExp, add all parameters to the reduceKeys.
+ if (!parseInfo.getDistinctFuncExprsForClause(dest).isEmpty()) {
+ List distFuncs = parseInfo.getDistinctFuncExprsForClause(dest);
+ String colName = getColumnInternalName(reduceKeys.size());
+ outputKeyColumnNames.add(colName);
+ for (int i = 0; i < distFuncs.size(); i++) {
+ ASTNode value = distFuncs.get(i);
+ int numExprs = 0;
+ List distinctIndices = new ArrayList();
+ // 0 is function name
+ for (int j = 1; j < value.getChildCount(); j++) {
+ ASTNode parameter = (ASTNode) value.getChild(j);
+ ExprNodeDesc expr = genExprNodeDesc(parameter, reduceSinkInputRowResolver);
+ // see if expr is already present in reduceKeys.
+ // get index of expr in reduceKeys
+ int ri;
+ for (ri = 0; ri < reduceKeys.size(); ri++) {
+ if (reduceKeys.get(ri).getExprString().equals(expr.getExprString())) {
+ break;
+ }
+ }
+ // add the expr to reduceKeys if it is not present
+ if (ri == reduceKeys.size()) {
+ reduceKeys.add(expr);
+ }
+ // add the index of expr in reduceKeys to distinctIndices
+ distinctIndices.add(ri);
+ String name = getColumnInternalName(numExprs);
+ String field = Utilities.ReduceField.KEY.toString() + "." + colName
+ + ":" + i
+ + "." + name;
+ ColumnInfo colInfo = new ColumnInfo(field, expr.getTypeInfo(), null, false);
+ reduceSinkOutputRowResolver.putExpression(parameter, colInfo);
+ numExprs++;
+ }
+ distinctColIndices.add(distinctIndices);
+ }
+ }
+
+ ArrayList reduceValues = new ArrayList();
+
+ // The dests can have different non-distinct aggregations, so we have to iterate over all of
+ // them
+ for (String destination : dests) {
+ HashMap aggregationTrees = parseInfo
+ .getAggregationExprsForClause(destination);
+
+ // Put parameters to aggregations in reduceValues
+ for (Map.Entry entry : aggregationTrees.entrySet()) {
+ ASTNode value = entry.getValue();
+ // 0 is function name
+ for (int i = 1; i < value.getChildCount(); i++) {
+ ASTNode parameter = (ASTNode) value.getChild(i);
+ if (reduceSinkOutputRowResolver.getExpression(parameter) == null) {
+ reduceValues.add(genExprNodeDesc(parameter,
+ reduceSinkInputRowResolver));
+ outputValueColumnNames
+ .add(getColumnInternalName(reduceValues.size() - 1));
+ String field = Utilities.ReduceField.VALUE.toString() + "."
+ + getColumnInternalName(reduceValues.size() - 1);
+ reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field,
+ reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null,
+ false));
+ }
+ }
+ }
+
+ // Need to pass all of the columns used in the where clauses as reduce values
+ ASTNode whereClause = parseInfo.getWhrForClause(destination);
+ if (whereClause != null) {
+ List columnExprs =
+ getColumnExprsFromASTNode(whereClause, reduceSinkInputRowResolver);
+ for (int i = 0; i < columnExprs.size(); i++) {
+ ASTNode parameter = columnExprs.get(i);
+ if (reduceSinkOutputRowResolver.getExpression(parameter) == null) {
+ reduceValues.add(genExprNodeDesc(parameter,
+ reduceSinkInputRowResolver));
+ outputValueColumnNames
+ .add(getColumnInternalName(reduceValues.size() - 1));
+ String field = Utilities.ReduceField.VALUE.toString() + "."
+ + getColumnInternalName(reduceValues.size() - 1);
+ reduceSinkOutputRowResolver.putExpression(parameter, new ColumnInfo(field,
+ reduceValues.get(reduceValues.size() - 1).getTypeInfo(), null,
+ false));
+ }
+ }
+ }
+ }
+
+ ReduceSinkOperator rsOp = (ReduceSinkOperator) putOpInsertMap(
+ OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys,
+ grpByExprs.size(), reduceValues, distinctColIndices,
+ outputKeyColumnNames, outputValueColumnNames, true, -1, grpByExprs.size(),
+ -1), new RowSchema(reduceSinkOutputRowResolver
+ .getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver);
+ rsOp.setColumnExprMap(colExprMap);
+ return rsOp;
+ }
+
/**
+ * Given an ASTNode, it returns all of the descendant ASTNodes which represent column expressions
+ *
+ * @param node
+ * @param inputRR
+ * @return
+ * @throws SemanticException
+ */
+ private List getColumnExprsFromASTNode(ASTNode node, RowResolver inputRR) throws SemanticException {
+ List nodes = new ArrayList();
+ if (node.getChildCount() == 0) {
+ return nodes;
+ }
+ for (int i = 0; i < node.getChildCount(); i++) {
+ ASTNode child = (ASTNode)node.getChild(i);
+ if (child.getType() == HiveParser.TOK_TABLE_OR_COL && child.getChild(0) != null &&
+ inputRR.get(null,
+ BaseSemanticAnalyzer.unescapeIdentifier(child.getChild(0).getText())) != null) {
+ nodes.add(child);
+ } else {
+ nodes.addAll(getColumnExprsFromASTNode(child, inputRR));
+ }
+ }
+ return nodes;
+ }
+
+ /**
* Generate the second ReduceSinkOperator for the Group By Plan
* (parseInfo.getXXX(dest)). The new ReduceSinkOperator will be a child of
* groupByOperatorInfo.
@@ -3179,6 +3345,85 @@
return groupByOperatorInfo;
}
+ @SuppressWarnings({"nls"})
+ private Operator genGroupByPlan1MRMultiReduceGB(List dests, QB qb, Operator input)
+ throws SemanticException {
+
+ QBParseInfo parseInfo = qb.getParseInfo();
+
+ ExprNodeDesc previous = null;
+ Operator selectInput = input;
+
+ // In order to facilitate partition pruning, or the where clauses together and put them at the
+ // top of the operator tree, this could also reduce the amount of data going to the reducer
+ for (String dest : dests) {
+ ASTNode whereExpr = parseInfo.getWhrForClause(dest);
+
+ if (whereExpr != null) {
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+ ExprNodeDesc current = genExprNodeDesc((ASTNode)whereExpr.getChild(0), inputRR);
+ if (previous == null) {
+ // If this is the first expression
+ previous = current;
+ continue;
+ }
+
+ GenericUDFOPOr or = new GenericUDFOPOr();
+ List expressions = new ArrayList(2);
+ expressions.add(previous);
+ expressions.add(current);
+ ExprNodeDesc orExpr = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, or, expressions);
+ previous = orExpr;
+ } else {
+ // If an expression does not have a where clause, there can be no common filter
+ previous = null;
+ break;
+ }
+ }
+
+ if (previous != null) {
+ OpParseContext inputCtx = opParseCtx.get(input);
+ RowResolver inputRR = inputCtx.getRowResolver();
+ FilterDesc orFilterDesc = new FilterDesc(previous, false);
+
+ selectInput = putOpInsertMap(OperatorFactory.getAndMakeChild(
+ orFilterDesc, new RowSchema(
+ inputRR.getColumnInfos()), input), inputRR);
+ }
+
+ // insert a select operator here used by the ColumnPruner to reduce
+ // the data to shuffle
+ Operator select = insertSelectAllPlanForGroupBy(selectInput);
+
+ // Generate ReduceSinkOperator
+ Operator reduceSinkOperatorInfo = genCommonGroupByPlanReduceSinkOperator(qb, dests, select);
+
+ // It is assumed throughout the code that a reducer has a single child, add a
+ // ForwardOperator so that we can add multiple filter/group by operators as children
+ RowResolver reduceSinkOperatorInfoRR = opParseCtx.get(reduceSinkOperatorInfo).getRowResolver();
+ Operator forwardOp = putOpInsertMap(OperatorFactory.getAndMakeChild(new ForwardDesc(),
+ new RowSchema(reduceSinkOperatorInfoRR.getColumnInfos()), reduceSinkOperatorInfo), reduceSinkOperatorInfoRR);
+
+ Operator curr = forwardOp;
+
+ for (String dest : dests) {
+ curr = forwardOp;
+
+ if (parseInfo.getWhrForClause(dest) != null) {
+ curr = genFilterPlan(dest, qb, forwardOp);
+ }
+
+ // Generate GroupbyOperator
+ Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo,
+ dest, curr, GroupByDesc.Mode.COMPLETE, null);
+
+ curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb);
+ }
+
+ return curr;
+ }
+
static ArrayList getUDAFEvaluators(
ArrayList aggs) {
ArrayList result = new ArrayList();
@@ -5445,7 +5690,7 @@
}
}
- private Operator insertSelectAllPlanForGroupBy(String dest, Operator input)
+ private Operator insertSelectAllPlanForGroupBy(Operator input)
throws SemanticException {
OpParseContext inputCtx = opParseCtx.get(input);
RowResolver inputRR = inputCtx.getRowResolver();
@@ -5627,6 +5872,105 @@
return rsOp;
}
+ // Split the clause names from the query block into groups based on common distinct and common
+ // group by expressions, i.e. the spray keys will be the same
+ private List> getCommonGroupByDestGroups(QB qb, Operator input)
+ throws SemanticException {
+
+ RowResolver inputRR = opParseCtx.get(input).getRowResolver();
+ QBParseInfo qbp = qb.getParseInfo();
+
+ TreeSet ks = new TreeSet();
+ ks.addAll(qbp.getClauseNames());
+
+ List> commonGroupByDestGroups = new ArrayList>();
+
+ // If this is a trivial query block return
+ if (ks.size() <= 1) {
+ List oneList = new ArrayList(1);
+ if (ks.size() == 1) {
+ oneList.add(ks.first());
+ }
+ commonGroupByDestGroups.add(oneList);
+ return commonGroupByDestGroups;
+ }
+
+ List> sprayKeyLists =
+ new ArrayList>(ks.size());
+
+ // Iterate over each clause
+ for (String dest : ks) {
+
+ List sprayKeys =
+ new ArrayList();
+
+ // Add the distinct expressions
+ List distinctAggExprs = qbp.getDistinctFuncExprsForClause(dest);
+ for (ASTNode distinctAggExpr: distinctAggExprs) {
+ try {
+ // 0 is function name
+ for (int i = 1; i < distinctAggExpr.getChildCount(); i++) {
+ ASTNode parameter = (ASTNode) distinctAggExpr.getChild(i);
+ ExprNodeDesc.ExprNodeDescEqualityWrapper distinctExpr =
+ new ExprNodeDesc.ExprNodeDescEqualityWrapper(genExprNodeDesc(parameter, inputRR));
+ if (!sprayKeys.contains(distinctExpr)) {
+ sprayKeys.add(distinctExpr);
+ }
+ }
+ } catch (SemanticException e) {
+ return null;
+ }
+ }
+
+ // Add the group by expressions
+ List grpByExprs = getGroupByForClause(qbp, dest);
+ for (ASTNode grpByExpr: grpByExprs) {
+ ExprNodeDesc.ExprNodeDescEqualityWrapper grpByExprWrapper =
+ new ExprNodeDesc.ExprNodeDescEqualityWrapper(genExprNodeDesc(grpByExpr, inputRR));
+ if (!sprayKeys.contains(grpByExprWrapper)) {
+ sprayKeys.add(grpByExprWrapper);
+ }
+ }
+
+ // Loop through each of the lists of exprs, looking for a match
+ boolean found = false;
+ for (int i = 0; i < sprayKeyLists.size(); i++) {
+ List sprayKeyList = sprayKeyLists.get(i);
+
+ if (sprayKeyList.size() != sprayKeys.size()) {
+ continue;
+ }
+
+ boolean allKeysFound = true;
+ for (ExprNodeDesc.ExprNodeDescEqualityWrapper exprNodeDesc : sprayKeyList) {
+ if (!sprayKeys.contains(exprNodeDesc)) {
+ allKeysFound = false;
+ break;
+ }
+ }
+
+ if (!allKeysFound) {
+ continue;
+ }
+
+ // A match was found, so add the clause to the corresponding list
+ commonGroupByDestGroups.get(i).add(dest);
+ found = true;
+ break;
+ }
+
+ // No match was found, so create new entries
+ if (!found) {
+ sprayKeyLists.add(sprayKeys);
+ List destGroup = new ArrayList();
+ destGroup.add(dest);
+ commonGroupByDestGroups.add(destGroup);
+ }
+ }
+
+ return commonGroupByDestGroups;
+ }
+
// see if there are any distinct expressions
private boolean distinctExprsExists(QB qb) {
QBParseInfo qbp = qb.getParseInfo();
@@ -5878,107 +6222,143 @@
curr = genFileSinkPlan(dest, qb, curr);
}
} else {
- // Go over all the destination tables
- for (String dest : ks) {
- curr = input;
+ List> commonGroupByDestGroups = new ArrayList>(ks.size());
- if (qbp.getWhrForClause(dest) != null) {
- curr = genFilterPlan(dest, qb, curr);
- }
+ // If we can put multiple group bys in a single reducer, determine suitable groups of
+ // expressions, otherwise treat all the expressions as a single group
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
+ commonGroupByDestGroups = getCommonGroupByDestGroups(qb, curr);
+ } else {
+ commonGroupByDestGroups.add(new ArrayList(ks));
+ }
- if (qbp.getAggregationExprsForClause(dest).size() != 0
- || getGroupByForClause(qbp, dest).size() > 0) {
- //multiple distincts is not supported with skew in data
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
- qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
- throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.
- getMsg());
+ if (!commonGroupByDestGroups.isEmpty()) {
+ for (List commonGroupByDestGroup : commonGroupByDestGroups) {
+ if (commonGroupByDestGroup.isEmpty()) {
+ continue;
}
- // insert a select operator here used by the ColumnPruner to reduce
- // the data to shuffle
- curr = insertSelectAllPlanForGroupBy(dest, curr);
- if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
- if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
- curr = genGroupByPlanMapAggr1MR(dest, qb, curr);
- } else {
- curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
+
+ String firstDest = commonGroupByDestGroup.get(0);
+ if (commonGroupByDestGroup.size() == 1 ||
+ (qbp.getAggregationExprsForClause(firstDest).size() == 0 &&
+ getGroupByForClause(qbp, firstDest).size() == 0) ||
+ conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) ||
+ !conf.getBoolVar(HiveConf.ConfVars.HIVEMULTIGROUPBYSINGLEREDUCER)) {
+
+ // Go over all the destination tables
+ for (String dest : commonGroupByDestGroup) {
+ curr = input;
+
+ if (qbp.getWhrForClause(dest) != null) {
+ curr = genFilterPlan(dest, qb, curr);
+ }
+
+ if (qbp.getAggregationExprsForClause(dest).size() != 0
+ || getGroupByForClause(qbp, dest).size() > 0) {
+ //multiple distincts is not supported with skew in data
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW) &&
+ qbp.getDistinctFuncExprsForClause(dest).size() > 1) {
+ throw new SemanticException(ErrorMsg.UNSUPPORTED_MULTIPLE_DISTINCTS.
+ getMsg());
+ }
+ // insert a select operator here used by the ColumnPruner to reduce
+ // the data to shuffle
+ curr = insertSelectAllPlanForGroupBy(curr);
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE)) {
+ if (!conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ curr = genGroupByPlanMapAggr1MR(dest, qb, curr);
+ } else {
+ curr = genGroupByPlanMapAggr2MR(dest, qb, curr);
+ }
+ } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
+ curr = genGroupByPlan2MR(dest, qb, curr);
+ } else {
+ curr = genGroupByPlan1MR(dest, qb, curr);
+ }
+ }
+
+ curr = genPostGroupByBodyPlan(curr, dest, qb);
}
- } else if (conf.getBoolVar(HiveConf.ConfVars.HIVEGROUPBYSKEW)) {
- curr = genGroupByPlan2MR(dest, qb, curr);
} else {
- curr = genGroupByPlan1MR(dest, qb, curr);
+ curr = genGroupByPlan1MRMultiReduceGB(commonGroupByDestGroup, qb, input);
}
}
+ }
+ }
- // Insert HAVING plan here
- if (qbp.getHavingForClause(dest) != null) {
- if (getGroupByForClause(qbp, dest).size() == 0) {
- throw new SemanticException("HAVING specified without GROUP BY");
- }
- curr = genHavingPlan(dest, qb, curr);
- }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Created Body Plan for Query Block " + qb.getId());
+ }
- curr = genSelectPlan(dest, qb, curr);
- Integer limit = qbp.getDestLimit(dest);
+ return curr;
+ }
- if (qbp.getClusterByForClause(dest) != null
- || qbp.getDistributeByForClause(dest) != null
- || qbp.getOrderByForClause(dest) != null
- || qbp.getSortByForClause(dest) != null) {
+ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) throws SemanticException {
+ QBParseInfo qbp = qb.getParseInfo();
- int numReducers = -1;
+ // Insert HAVING plan here
+ if (qbp.getHavingForClause(dest) != null) {
+ if (getGroupByForClause(qbp, dest).size() == 0) {
+ throw new SemanticException("HAVING specified without GROUP BY");
+ }
+ curr = genHavingPlan(dest, qb, curr);
+ }
- // Use only 1 reducer if order by is present
- if (qbp.getOrderByForClause(dest) != null) {
- numReducers = 1;
- }
+ curr = genSelectPlan(dest, qb, curr);
+ Integer limit = qbp.getDestLimit(dest);
- curr = genReduceSinkPlan(dest, qb, curr, numReducers);
- }
+ if (qbp.getClusterByForClause(dest) != null
+ || qbp.getDistributeByForClause(dest) != null
+ || qbp.getOrderByForClause(dest) != null
+ || qbp.getSortByForClause(dest) != null) {
- if (qbp.getIsSubQ()) {
- if (limit != null) {
- // In case of order by, only 1 reducer is used, so no need of
- // another shuffle
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp
- .getOrderByForClause(dest) != null ? false : true);
- }
- } else {
- curr = genConversionOps(dest, qb, curr);
- // exact limit can be taken care of by the fetch operator
- if (limit != null) {
- boolean extraMRStep = true;
+ int numReducers = -1;
- if (qb.getIsQuery() && qbp.getClusterByForClause(dest) == null
- && qbp.getSortByForClause(dest) == null) {
- extraMRStep = false;
- }
+ // Use only 1 reducer if order by is present
+ if (qbp.getOrderByForClause(dest) != null) {
+ numReducers = 1;
+ }
- curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(),
- extraMRStep);
- qb.getParseInfo().setOuterQueryLimit(limit.intValue());
- }
- curr = genFileSinkPlan(dest, qb, curr);
+ curr = genReduceSinkPlan(dest, qb, curr, numReducers);
+ }
+
+ if (qbp.getIsSubQ()) {
+ if (limit != null) {
+ // In case of order by, only 1 reducer is used, so no need of
+ // another shuffle
+ curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(), qbp
+ .getOrderByForClause(dest) != null ? false : true);
+ }
+ } else {
+ curr = genConversionOps(dest, qb, curr);
+ // exact limit can be taken care of by the fetch operator
+ if (limit != null) {
+ boolean extraMRStep = true;
+
+ if (qb.getIsQuery() && qbp.getClusterByForClause(dest) == null
+ && qbp.getSortByForClause(dest) == null) {
+ extraMRStep = false;
}
- // change curr ops row resolver's tab aliases to query alias if it
- // exists
- if (qb.getParseInfo().getAlias() != null) {
- RowResolver rr = opParseCtx.get(curr).getRowResolver();
- RowResolver newRR = new RowResolver();
- String alias = qb.getParseInfo().getAlias();
- for (ColumnInfo colInfo : rr.getColumnInfos()) {
- String name = colInfo.getInternalName();
- String[] tmp = rr.reverseLookup(name);
- newRR.put(alias, tmp[1], colInfo);
- }
- opParseCtx.get(curr).setRowResolver(newRR);
- }
+ curr = genLimitMapRedPlan(dest, qb, curr, limit.intValue(),
+ extraMRStep);
+ qb.getParseInfo().setOuterQueryLimit(limit.intValue());
}
+ curr = genFileSinkPlan(dest, qb, curr);
}
- if (LOG.isDebugEnabled()) {
- LOG.debug("Created Body Plan for Query Block " + qb.getId());
+ // change curr ops row resolver's tab aliases to query alias if it
+ // exists
+ if (qb.getParseInfo().getAlias() != null) {
+ RowResolver rr = opParseCtx.get(curr).getRowResolver();
+ RowResolver newRR = new RowResolver();
+ String alias = qb.getParseInfo().getAlias();
+ for (ColumnInfo colInfo : rr.getColumnInfos()) {
+ String name = colInfo.getInternalName();
+ String[] tmp = rr.reverseLookup(name);
+ newRR.put(alias, tmp[1], colInfo);
+ }
+ opParseCtx.get(curr).setRowResolver(newRR);
}
return curr;