Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 745281) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -106,6 +106,7 @@ HIVEPARTITIONPRUNER("hive.partition.pruning", "nonstrict"), HIVEALIAS("hive.alias", ""), HIVEMAPSIDEAGGREGATE("hive.map.aggr", "false"), + HIVEGROUPBYSKEW("hive.groupby.skewindata", "true"), HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000), HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float)0.5), Index: ql/src/test/results/clientpositive/groupby2_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out (revision 0) @@ -0,0 +1,77 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_g2)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (TOK_COLREF src key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (TOK_COLREF src value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (TOK_COLREF src key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (TOK_COLREF src value) 5))))) (TOK_GROUPBY (TOK_FUNCTION substr (TOK_COLREF src key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: substr(key, 1, 1) + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: substr(key, 1, 1) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(KEY.1)) + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 2 + type: bigint + expr: concat(0, UDFToString(1)) + type: string + Select Operator + expressions: + expr: 0 + type: string + expr: UDFToInteger(1) + type: int + expr: 2 + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_g2 + + +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 Index: ql/src/test/results/clientpositive/groupby7_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_noskew.q.out (revision 0) @@ -0,0 +1,739 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF SRC)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1, Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(VALUE.0)) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/464852318/792620405.10002 + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(VALUE.0)) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby2_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby2_map.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby2_map.q.out (working copy) @@ -3,8 +3,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -31,8 +30,6 @@ Map-reduce partition columns: expr: 0 type: string - expr: 1 - type: string tag: -1 value expressions: expr: 2 @@ -43,40 +40,6 @@ Group By Operator aggregations: expr: sum(VALUE.0) - expr: count(DISTINCT KEY.1) - keys: - expr: KEY.0 - type: string - mode: partial2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.mapred.SequenceFileOutputFormat - name: binary_table - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - /data/users/zshao/sync/apache-trunk-HIVE-270/build/ql/tmp/73868446/32430537.10001 - Reduce Output Operator - key expressions: - expr: 0 - type: string - sort order: + - Map-reduce partition columns: - expr: 0 - type: string - tag: -1 - value expressions: - expr: 1 - type: double - expr: 2 - type: bigint - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE.0) expr: count(VALUE.1) keys: expr: KEY.0 Index: ql/src/test/results/clientpositive/groupby6_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6_map.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby6_map.q.out (revision 0) @@ -0,0 +1,69 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (TOK_COLREF src value) 5 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Select Operator + expressions: + expr: value + type: string + Group By Operator + keys: + expr: substr(0, 5, 1) + type: string + mode: hash + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 Index: ql/src/test/results/clientpositive/groupby4_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out (revision 0) @@ -0,0 +1,64 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (TOK_COLREF src key) 1 1))) (TOK_GROUPBY (TOK_FUNCTION substr (TOK_COLREF src key) 1 1)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Select Operator + expressions: + expr: key + type: string + Reduce Output Operator + key expressions: + expr: substr(0, 1, 1) + type: string + sort order: + + Map-reduce partition columns: + expr: substr(0, 1, 1) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 Index: ql/src/test/results/clientpositive/groupby8.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby8.q.out (working copy) @@ -1,3 +1,190 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF SRC)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2, Stage-4 + Stage-3 depends on stages: Stage-1 + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-2, Stage-4 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: partial1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/40033941/331891271.10002 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/40033941/331891271.10003 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: substr(value, 5) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: partial1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/40033941/331891271.10004 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + 0 1 10 1 100 1 Index: ql/src/test/results/clientpositive/groupby1_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby1_noskew.q.out (revision 0) @@ -0,0 +1,372 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_g1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (TOK_COLREF src value) 5)))) (TOK_GROUPBY (TOK_COLREF src key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(VALUE.0)) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: 1 + type: double + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_g1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_g1 + + +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby3_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_map.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby3_map.q.out (working copy) @@ -3,8 +3,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -31,9 +30,6 @@ expr: 0 type: string sort order: + - Map-reduce partition columns: - expr: 0 - type: string tag: -1 value expressions: expr: 1 @@ -51,40 +47,6 @@ aggregations: expr: sum(VALUE.0) expr: max(VALUE.1) - expr: avg(DISTINCT UDFToDouble(KEY.0)) - expr: min(VALUE.3) - expr: avg(VALUE.4) - mode: partial2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.mapred.SequenceFileOutputFormat - name: binary_table - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - /data/users/zshao/sync/apache-trunk-HIVE-270/build/ql/tmp/793761293/734762737.10001 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: 0 - type: double - expr: 1 - type: string - expr: 2 - type: string - expr: 3 - type: string - expr: 4 - type: string - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE.0) - expr: max(VALUE.1) expr: avg(VALUE.2) expr: min(VALUE.3) expr: avg(VALUE.4) Index: ql/src/test/results/clientpositive/groupby7_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby7_map.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby7_map.q.out (revision 0) @@ -0,0 +1,753 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF SRC)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1, Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Group By Operator + aggregations: + expr: sum(UDFToDouble(substr(value, 5))) + keys: + expr: key + type: string + mode: hash + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: double + Group By Operator + aggregations: + expr: sum(UDFToDouble(substr(value, 5))) + keys: + expr: key + type: string + mode: hash + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/387369318/573737442.10002 + Reduce Output Operator + key expressions: + expr: 0 + type: string + sort order: + + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 1 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby6_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby6_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out (revision 0) @@ -0,0 +1,64 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECTDI (TOK_SELEXPR (TOK_FUNCTION substr (TOK_COLREF src value) 5 1))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Select Operator + expressions: + expr: value + type: string + Reduce Output Operator + key expressions: + expr: substr(0, 5, 1) + type: string + sort order: + + Map-reduce partition columns: + expr: substr(0, 5, 1) + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 Index: ql/src/test/results/clientpositive/groupby3_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby3_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby3_noskew.q.out (revision 0) @@ -0,0 +1,75 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (TOK_COLREF src value) 5))) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_FUNCTION substr (TOK_COLREF src value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI avg (TOK_FUNCTION substr (TOK_COLREF src value) 5))) (TOK_SELEXPR (TOK_FUNCTION max (TOK_FUNCTION substr (TOK_COLREF src value) 5))) (TOK_SELEXPR (TOK_FUNCTION min (TOK_FUNCTION substr (TOK_COLREF src value) 5)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Select Operator + expressions: + expr: value + type: string + Reduce Output Operator + key expressions: + expr: substr(0, 5) + type: string + sort order: + + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(KEY.0)) + expr: max(KEY.0) + expr: avg(DISTINCT UDFToDouble(KEY.0)) + expr: min(KEY.0) + expr: avg(KEY.0) + mode: complete + Select Operator + expressions: + expr: 0 + type: double + expr: 4 + type: double + expr: 2 + type: double + expr: 1 + type: string + expr: 3 + type: string + Select Operator + expressions: + expr: 0 + type: double + expr: 1 + type: double + expr: 2 + type: double + expr: UDFToDouble(3) + type: double + expr: UDFToDouble(4) + type: double + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + +130091.0 260.182 256.10355987055016 98.0 0.0 Index: ql/src/test/results/clientpositive/groupby4_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby4_map.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby4_map.q.out (working copy) @@ -17,9 +17,6 @@ mode: hash Reduce Output Operator sort order: - Map-reduce partition columns: - expr: rand() - type: double tag: -1 value expressions: expr: 0 Index: ql/src/test/results/clientpositive/groupby8_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_map.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_map.q.out (revision 0) @@ -0,0 +1,761 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF SRC)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1, Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + Reduce Output Operator + key expressions: + expr: 0 + type: string + expr: 1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 2 + type: bigint + Group By Operator + aggregations: + expr: count(DISTINCT substr(value, 5)) + keys: + expr: key + type: string + expr: substr(value, 5) + type: string + mode: hash + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/266935708/1076076860.10002 + Reduce Output Operator + key expressions: + expr: 0 + type: string + expr: 1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: 0 + type: string + tag: -1 + value expressions: + expr: 2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE.0) + keys: + expr: KEY.0 + type: string + mode: final + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby8_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby8_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby8_noskew.q.out (revision 0) @@ -0,0 +1,737 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF SRC)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB DEST2)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF SRC key)) (TOK_SELEXPR (TOK_FUNCTIONDI COUNT (TOK_FUNCTION SUBSTR (TOK_COLREF SRC value) 5)))) (TOK_GROUPBY (TOK_COLREF SRC key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1, Stage-2 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-1, Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + tag: -1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.mapred.SequenceFileOutputFormat + name: binary_table + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + /data/users/njain/hive4/hive4/build/ql/tmp/496809175/118762350.10002 + Reduce Output Operator + key expressions: + expr: key + type: string + expr: substr(value, 5) + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(DISTINCT KEY.1) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: bigint + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest2 + + +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 Index: ql/src/test/results/clientpositive/groupby5_noskew.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby5_noskew.q.out (revision 0) +++ ql/src/test/results/clientpositive/groupby5_noskew.q.out (revision 0) @@ -0,0 +1,372 @@ +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest1)) (TOK_SELECT (TOK_SELEXPR (TOK_COLREF src key)) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (TOK_COLREF src value) 5)))) (TOK_GROUPBY (TOK_COLREF src key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + value expressions: + expr: substr(value, 5) + type: string + Reduce Operator Tree: + Group By Operator + aggregations: + expr: sum(UDFToDouble(VALUE.0)) + keys: + expr: KEY.0 + type: string + mode: complete + Select Operator + expressions: + expr: 0 + type: string + expr: 1 + type: double + Select Operator + expressions: + expr: UDFToInteger(0) + type: int + expr: UDFToString(1) + type: string + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 + + +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 Index: ql/src/test/results/clientpositive/groupby1_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby1_map.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby1_map.q.out (working copy) @@ -3,8 +3,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -24,8 +23,8 @@ type: string sort order: + Map-reduce partition columns: - expr: rand() - type: double + expr: 0 + type: string tag: -1 value expressions: expr: 1 @@ -37,37 +36,6 @@ keys: expr: KEY.0 type: string - mode: partial2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.mapred.SequenceFileOutputFormat - name: binary_table - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - /data/users/zshao/sync/apache-trunk-HIVE-270/build/ql/tmp/623301799/541321831.10001 - Reduce Output Operator - key expressions: - expr: 0 - type: string - sort order: + - Map-reduce partition columns: - expr: 0 - type: string - tag: -1 - value expressions: - expr: 1 - type: double - Reduce Operator Tree: - Group By Operator - aggregations: - expr: sum(VALUE.0) - keys: - expr: KEY.0 - type: string mode: final Select Operator expressions: Index: ql/src/test/results/clientpositive/groupby5_map.q.out =================================================================== --- ql/src/test/results/clientpositive/groupby5_map.q.out (revision 745281) +++ ql/src/test/results/clientpositive/groupby5_map.q.out (working copy) @@ -20,9 +20,6 @@ mode: hash Reduce Output Operator sort order: - Map-reduce partition columns: - expr: rand() - type: double tag: -1 value expressions: expr: 0 Index: ql/src/test/queries/clientpositive/groupby7_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_noskew.q (revision 0) @@ -0,0 +1,20 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby2_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_map.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby2_map.q (working copy) @@ -1,4 +1,5 @@ set hive.map.aggr=true; +set mapred.reduce.tasks=31; CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby6_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby6_map.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby6_map.q (revision 0) @@ -0,0 +1,15 @@ +set hive.map.aggr=true; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1); + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1); + +SELECT dest1.* FROM dest1; + + Index: ql/src/test/queries/clientpositive/groupby8_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_noskew.q (revision 0) @@ -0,0 +1,18 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + Index: ql/src/test/queries/clientpositive/groupby1_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby1_noskew.q (revision 0) @@ -0,0 +1,11 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key; + +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key; + +SELECT dest_g1.* FROM dest_g1; Property changes on: ql/src/test/queries/clientpositive/groupby1_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/test/queries/clientpositive/groupby1_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby1_map.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby1_map.q (working copy) @@ -1,4 +1,5 @@ set hive.map.aggr=true; +set mapred.reduce.tasks=31; CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby5_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby5_map.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby5_map.q (working copy) @@ -1,4 +1,5 @@ set hive.map.aggr=true; +set mapred.reduce.tasks=31; CREATE TABLE dest1(key INT) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby2_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby2_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby2_noskew.q (revision 0) @@ -0,0 +1,13 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1); + +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1); + +SELECT dest_g2.* FROM dest_g2; Property changes on: ql/src/test/queries/clientpositive/groupby2_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/test/queries/clientpositive/groupby3_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby3_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby3_noskew.q (revision 0) @@ -0,0 +1,13 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)); + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), avg(substr(src.value,5)), avg(DISTINCT substr(src.value,5)), max(substr(src.value,5)), min(substr(src.value,5)); + +SELECT dest1.* FROM dest1; Property changes on: ql/src/test/queries/clientpositive/groupby3_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/test/queries/clientpositive/groupby4_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby4_map.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby4_map.q (working copy) @@ -1,4 +1,5 @@ set hive.map.aggr=true; +set mapred.reduce.tasks=31; CREATE TABLE dest1(key INT) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby8_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8_map.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby8_map.q (revision 0) @@ -0,0 +1,18 @@ +set hive.map.aggr=true; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; + Index: ql/src/test/queries/clientpositive/groupby4_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby4_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby4_noskew.q (revision 0) @@ -0,0 +1,14 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1); + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1); + +SELECT dest1.* FROM dest1; + Property changes on: ql/src/test/queries/clientpositive/groupby4_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/test/queries/clientpositive/groupby8.q =================================================================== --- ql/src/test/queries/clientpositive/groupby8.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby8.q (working copy) @@ -1,10 +1,15 @@ CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; +EXPLAIN FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key; + SELECT DEST1.* FROM DEST1; SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby5_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby5_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby5_noskew.q (revision 0) @@ -0,0 +1,18 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE; + +EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key; + +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key; + +SELECT dest1.* FROM dest1; + Property changes on: ql/src/test/queries/clientpositive/groupby5_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/test/queries/clientpositive/groupby3_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby3_map.q (revision 745281) +++ ql/src/test/queries/clientpositive/groupby3_map.q (working copy) @@ -1,4 +1,5 @@ set hive.map.aggr=true; +set mapred.reduce.tasks=31; CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE) STORED AS TEXTFILE; Index: ql/src/test/queries/clientpositive/groupby7_map.q =================================================================== --- ql/src/test/queries/clientpositive/groupby7_map.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby7_map.q (revision 0) @@ -0,0 +1,20 @@ +set hive.map.aggr=true; +set mapred.reduce.tasks=31; + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE; +CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE; + +SET hive.exec.compress.intermediate=true; +SET hive.exec.compress.output=true; + +EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key; + +SELECT DEST1.* FROM DEST1; +SELECT DEST2.* FROM DEST2; Index: ql/src/test/queries/clientpositive/groupby6_noskew.q =================================================================== --- ql/src/test/queries/clientpositive/groupby6_noskew.q (revision 0) +++ ql/src/test/queries/clientpositive/groupby6_noskew.q (revision 0) @@ -0,0 +1,15 @@ +set hive.groupby.skewindata=false; +set mapred.reduce.tasks=31; + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE; + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1); + +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1); + +SELECT dest1.* FROM dest1; + + Property changes on: ql/src/test/queries/clientpositive/groupby6_noskew.q ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 745281) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -1554,94 +1554,6 @@ } /** - * Generate the ReduceSinkOperator for the Group By Query Block (parseInfo.getXXX(dest)). - * The new ReduceSinkOperator will be a child of inputOperatorInfo. - * - * It will put all Group By keys and the distinct field (if any) in the map-reduce sort key, - * and all other fields in the map-reduce value. - * - * The map-reduce partition key will be random() if there is no distinct, or the same as - * the map-reduce sort key otherwise. - * - * @return the new ReduceSinkOperator. - * @throws SemanticException - */ - @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator(QBParseInfo parseInfo, - String dest, Operator inputOperatorInfo) - throws SemanticException { - - return genGroupByPlanReduceSinkOperator(parseInfo, dest, inputOperatorInfo, -1); - } - - @SuppressWarnings("nls") - private Operator genGroupByPlanReduceSinkOperator(QBParseInfo parseInfo, - String dest, Operator inputOperatorInfo, int numReducers) - throws SemanticException { - RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); - RowResolver reduceSinkOutputRowResolver = new RowResolver(); - reduceSinkOutputRowResolver.setIsExprResolver(true); - ArrayList reduceKeys = new ArrayList(); - - // Pre-compute group-by keys and store in reduceKeys - List grpByExprs = getGroupByForClause(parseInfo, dest); - for (int i = 0; i < grpByExprs.size(); ++i) { - ASTNode grpbyExpr = grpByExprs.get(i); - String text = grpbyExpr.toStringTree(); - - if (reduceSinkOutputRowResolver.get("", text) == null) { - ColumnInfo exprInfo = reduceSinkInputRowResolver.get("", text); - reduceKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName())); - reduceSinkOutputRowResolver.put("", text, - new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + Integer.valueOf(reduceKeys.size() - 1).toString(), - exprInfo.getType())); - } - } - - // If there is a distinctFuncExp, add all parameters to the reduceKeys. - if (parseInfo.getDistinctFuncExprForClause(dest) != null) { - ASTNode value = parseInfo.getDistinctFuncExprForClause(dest); - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - ColumnInfo exprInfo = reduceSinkInputRowResolver.get("", text); - reduceKeys.add(new exprNodeColumnDesc(exprInfo.getType(), exprInfo.getInternalName())); - reduceSinkOutputRowResolver.put("", text, - new ColumnInfo(Utilities.ReduceField.KEY.toString() + "." + Integer.valueOf(reduceKeys.size() - 1).toString(), - exprInfo.getType())); - } - } - } - - // Put partial aggregation results in reduceValues - ArrayList reduceValues = new ArrayList(); - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - int inputField = reduceKeys.size(); - - for (Map.Entry entry : aggregationTrees.entrySet()) { - - TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); - reduceValues.add(new exprNodeColumnDesc( - type, (Integer.valueOf(inputField)).toString())); - inputField++; - reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), - new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + (Integer.valueOf(reduceValues.size()-1)).toString(), - type)); - } - - return putOpInsertMap( - OperatorFactory.getAndMakeChild( - PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, -1, - (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 : Integer.MAX_VALUE), numReducers), - new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), - inputOperatorInfo), - reduceSinkOutputRowResolver); - } - - /** * Generate the ReduceSinkOperator for the Group By Query Block (qb.getPartInfo().getXXX(dest)). * The new ReduceSinkOperator will be a child of inputOperatorInfo. * @@ -1655,7 +1567,8 @@ */ @SuppressWarnings("nls") private Operator genGroupByPlanReduceSinkOperator(QB qb, - String dest, Operator inputOperatorInfo, int numPartitionFields) throws SemanticException { + String dest, Operator inputOperatorInfo, int numPartitionFields, int numReducers, boolean mapAggrDone) throws SemanticException { + RowResolver reduceSinkInputRowResolver = opParseCtx.get(inputOperatorInfo).getRR(); QBParseInfo parseInfo = qb.getParseInfo(); RowResolver reduceSinkOutputRowResolver = new RowResolver(); @@ -1693,28 +1606,44 @@ } } - // Put parameters to aggregations in reduceValues ArrayList reduceValues = new ArrayList(); - HashMap aggregationTrees = parseInfo - .getAggregationExprsForClause(dest); - for (Map.Entry entry : aggregationTrees.entrySet()) { + HashMap aggregationTrees = parseInfo.getAggregationExprsForClause(dest); + + if (!mapAggrDone) { + // Put parameters to aggregations in reduceValues + for (Map.Entry entry : aggregationTrees.entrySet()) { ASTNode value = entry.getValue(); - // 0 is function name - for (int i = 1; i < value.getChildCount(); i++) { - ASTNode parameter = (ASTNode) value.getChild(i); - String text = parameter.toStringTree(); - if (reduceSinkOutputRowResolver.get("",text) == null) { - reduceValues.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); - reduceSinkOutputRowResolver.put("", text, - new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + Integer.valueOf(reduceValues.size() - 1).toString(), - reduceValues.get(reduceValues.size()-1).getTypeInfo())); + // 0 is function name + for (int i = 1; i < value.getChildCount(); i++) { + ASTNode parameter = (ASTNode) value.getChild(i); + String text = parameter.toStringTree(); + if (reduceSinkOutputRowResolver.get("",text) == null) { + reduceValues.add(genExprNodeDesc(parameter, reduceSinkInputRowResolver)); + reduceSinkOutputRowResolver.put("", text, + new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + Integer.valueOf(reduceValues.size() - 1).toString(), + reduceValues.get(reduceValues.size()-1).getTypeInfo())); + } } } } + else + { + // Put partial aggregation results in reduceValues + int inputField = reduceKeys.size(); + + for (Map.Entry entry : aggregationTrees.entrySet()) { + + TypeInfo type = reduceSinkInputRowResolver.getColumnInfos().get(inputField).getType(); + reduceValues.add(new exprNodeColumnDesc(type, (Integer.valueOf(inputField)).toString())); + inputField++; + reduceSinkOutputRowResolver.put("", ((ASTNode)entry.getValue()).toStringTree(), + new ColumnInfo(Utilities.ReduceField.VALUE.toString() + "." + (Integer.valueOf(reduceValues.size()-1)).toString(), + type)); + } + } return putOpInsertMap( - OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, -1, numPartitionFields, - -1), + OperatorFactory.getAndMakeChild(PlanUtils.getReduceSinkDesc(reduceKeys, reduceValues, -1, numPartitionFields, numReducers), new RowSchema(reduceSinkOutputRowResolver.getColumnInfos()), inputOperatorInfo), reduceSinkOutputRowResolver @@ -1874,9 +1803,8 @@ // ////// 1. Generate ReduceSinkOperator Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( - qb, dest, input, getGroupByForClause(parseInfo, dest).size()); + qb, dest, input, getGroupByForClause(parseInfo, dest).size(), -1, false); - // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.COMPLETE); @@ -1914,7 +1842,7 @@ // captured by WritableComparableHiveObject.hashCode() function. Operator reduceSinkOperatorInfo = genGroupByPlanReduceSinkOperator( qb, dest, input, (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 - : Integer.MAX_VALUE)); + : Integer.MAX_VALUE), -1, false); // ////// 2. Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, @@ -1951,7 +1879,7 @@ * Evaluate partial aggregates first, followed by actual aggregates. */ @SuppressWarnings("nls") - private Operator genGroupByPlan4MR(String dest, QB qb, + private Operator genGroupByPlanMapAggr1MR(String dest, QB qb, Operator inputOperatorInfo) throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); @@ -1960,13 +1888,46 @@ Operator groupByOperatorInfo = genGroupByPlanMapGroupByOperator(qb, dest, inputOperatorInfo, groupByDesc.Mode.HASH); + int numReducers = -1; + + // Optimize the scenario when there are no grouping keys and no distinct - only 1 reducer is needed + // For eg: select count(1) from T where t.ds = .... + if (!optimizeMapAggrGroupBy(dest, qb)) + numReducers = 1; + + // ////// Generate ReduceSink Operator + Operator reduceSinkOperatorInfo = + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest).size(), numReducers, true); + + return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.FINAL); + } + + /** + * Generate a Group-By plan using a 2 map-reduce jobs. First perform a map + * side partial aggregation (to reduce the amount of data). Then spray by + * the distinct key (or a random number) in hope of getting a uniform + * distribution, and compute partial aggregates grouped by that distinct key. + * Evaluate partial aggregates first, followed by actual aggregates. + */ + @SuppressWarnings("nls") + private Operator genGroupByPlanMapAggr2MR(String dest, QB qb, + Operator inputOperatorInfo) throws SemanticException { + + QBParseInfo parseInfo = qb.getParseInfo(); + + // ////// Generate GroupbyOperator for a map-side partial aggregation + Operator groupByOperatorInfo = genGroupByPlanMapGroupByOperator(qb, + dest, inputOperatorInfo, groupByDesc.Mode.HASH); + // Optimize the scenario when there are no grouping keys and no distinct - 2 map-reduce jobs are not needed // For eg: select count(1) from T where t.ds = .... if (!optimizeMapAggrGroupBy(dest, qb)) { // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(parseInfo, dest, groupByOperatorInfo); + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, + (parseInfo.getDistinctFuncExprForClause(dest) == null ? -1 + : Integer.MAX_VALUE), -1, true); // ////// Generate GroupbyOperator for a partial aggregation Operator groupByOperatorInfo2 = genGroupByPlanGroupByOperator1(parseInfo, dest, reduceSinkOperatorInfo, @@ -1982,7 +1943,7 @@ else { // ////// Generate ReduceSink Operator Operator reduceSinkOperatorInfo = - genGroupByPlanReduceSinkOperator(parseInfo, dest, groupByOperatorInfo, 1); + genGroupByPlanReduceSinkOperator(qb, dest, groupByOperatorInfo, getGroupByForClause(parseInfo, dest).size(), 1, true); return genGroupByPlanGroupByOperator2MR(parseInfo, dest, reduceSinkOperatorInfo, groupByDesc.Mode.FINAL); } @@ -2739,9 +2700,11 @@ || getGroupByForClause(qbp, dest).size() > 0) { if (conf.getVar(HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE).equalsIgnoreCase("true")) - curr = genGroupByPlan4MR(dest, qb, curr); - else + curr = genGroupByPlanMapAggr1MR(dest, qb, curr); + else if (conf.getVar(HiveConf.ConfVars.HIVEGROUPBYSKEW).equalsIgnoreCase("true")) curr = genGroupByPlan2MR(dest, qb, curr); + else + curr = genGroupByPlan1MR(dest, qb, curr); } curr = genSelectPlan(dest, qb, curr); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFAvg.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFAvg.java (revision 745281) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDAFAvg.java (working copy) @@ -54,9 +54,15 @@ public boolean merge(String o) { if (o != null && !o.isEmpty()) { int pos = o.indexOf('/'); - assert(pos != -1); - mSum += Double.parseDouble(o.substring(0, pos)); - mCount += Long.parseLong(o.substring(pos+1)); + // In case merge is called without an iterate, the + if (pos == -1) { + mSum += Double.parseDouble(o); + mCount ++; + } + else { + mSum += Double.parseDouble(o.substring(0, pos)); + mCount += Long.parseLong(o.substring(pos+1)); + } } return true; }