diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java index 8500b21..bb5dd79 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/SparkWork.java @@ -25,6 +25,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.LinkedHashMap; import java.util.List; @@ -47,8 +48,8 @@ private static int counter; private final String name; - private final Set roots = new HashSet(); - private final Set leaves = new HashSet(); + private final Set roots = new LinkedHashSet(); + private final Set leaves = new LinkedHashSet<>(); protected final Map> workGraph = new HashMap>(); @@ -182,14 +183,14 @@ public void disconnect(BaseWork a, BaseWork b) { * getRoots returns all nodes that do not have a parent. */ public Set getRoots() { - return new HashSet(roots); + return new LinkedHashSet(roots); } /** * getLeaves returns all nodes that do not have a child */ public Set getLeaves() { - return new HashSet(leaves); + return new LinkedHashSet(leaves); } public void setRequiredCounterPrefix(Map> requiredCounterPrefix) { diff --git ql/src/test/queries/clientpositive/union_remove_22.q ql/src/test/queries/clientpositive/union_remove_22.q index 96f4fe6..d2c22e0 100644 --- ql/src/test/queries/clientpositive/union_remove_22.q +++ ql/src/test/queries/clientpositive/union_remove_22.q @@ -7,6 +7,8 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS + -- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, some columns are repeated. So, union cannot be removed. diff --git ql/src/test/queries/clientpositive/union_remove_6_subq.q ql/src/test/queries/clientpositive/union_remove_6_subq.q index 592d523..b0c0539 100644 --- ql/src/test/queries/clientpositive/union_remove_6_subq.q +++ ql/src/test/queries/clientpositive/union_remove_6_subq.q @@ -6,6 +6,8 @@ set hive.merge.mapfiles=false; set hive.merge.mapredfiles=false; set mapred.input.dir.recursive=true; +-- SORT_QUERY_RESULTS + -- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. diff --git ql/src/test/results/clientpositive/spark/union_remove_22.q.out ql/src/test/results/clientpositive/spark/union_remove_22.q.out index d1cd3f8..8fe568a 100644 --- ql/src/test/results/clientpositive/spark/union_remove_22.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_22.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, some columns are repeated. So, union cannot be removed. -- It does not matter, whether the output is merged or not. In this case, merging is turned @@ -12,7 +14,9 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, some columns are repeated. So, union cannot be removed. -- It does not matter, whether the output is merged or not. In this case, merging is turned @@ -227,16 +231,16 @@ POSTHOOK: query: select * from outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### -8 2 2 -2 1 1 1 1 1 -3 1 1 -7 1 1 1 1 1 +2 1 1 +2 1 1 +3 1 1 3 1 1 7 1 1 +7 1 1 +8 2 2 8 2 2 -2 1 1 PREHOOK: query: explain insert overwrite table outputTbl1 SELECT a.key, concat(a.`values`, a.`values`), concat(a.`values`, a.`values`) diff --git ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out index a768c4c..7e7b867 100644 --- ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/spark/union_remove_6_subq.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -9,7 +11,9 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -557,132 +561,282 @@ WINDOW w AS (PARTITION BY key ORDER BY c ROWS UNBOUNDED PRECEDING) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0 3.0 +0 4.5 10 1.0 10 1.5 100 2.0 100 3.0 +103 2.0 +103 3.0 104 2.0 104 3.0 +105 1.0 +105 1.5 +11 1.0 +11 1.5 111 1.0 111 1.5 113 2.0 113 3.0 +114 1.0 +114 1.5 +116 1.0 +116 1.5 +118 2.0 +118 3.0 119 3.0 119 4.5 12 2.0 12 3.0 120 2.0 120 3.0 +125 2.0 +125 3.0 126 1.0 126 1.5 128 3.0 128 4.5 +129 2.0 +129 3.0 131 1.0 131 1.5 133 1.0 133 1.5 +134 2.0 +134 3.0 +136 1.0 +136 1.5 137 2.0 137 3.0 +138 4.0 +138 6.0 +143 1.0 +143 1.5 +145 1.0 +145 1.5 146 2.0 146 3.0 +149 2.0 +149 3.0 +15 2.0 +15 3.0 +150 1.0 +150 1.5 +152 2.0 +152 3.0 153 1.0 153 1.5 155 1.0 155 1.5 +156 1.0 +156 1.5 157 1.0 157 1.5 +158 1.0 +158 1.5 160 1.0 160 1.5 162 1.0 162 1.5 +163 1.0 +163 1.5 164 2.0 164 3.0 +165 2.0 +165 3.0 166 1.0 166 1.5 +167 3.0 +167 4.5 168 1.0 168 1.5 +169 4.0 +169 6.0 +17 1.0 +17 1.5 +170 1.0 +170 1.5 +172 2.0 +172 3.0 +174 2.0 +174 3.0 175 2.0 175 3.0 +176 2.0 +176 3.0 177 1.0 177 1.5 +178 1.0 +178 1.5 179 2.0 179 3.0 18 2.0 18 3.0 180 1.0 180 1.5 +181 1.0 +181 1.5 +183 1.0 +183 1.5 186 1.0 186 1.5 +187 3.0 +187 4.5 +189 1.0 +189 1.5 +19 1.0 +19 1.5 +190 1.0 +190 1.5 191 2.0 191 3.0 +192 1.0 +192 1.5 193 3.0 193 4.5 +194 1.0 +194 1.5 195 2.0 195 3.0 +196 1.0 +196 1.5 197 2.0 197 3.0 199 3.0 199 4.5 +2 1.0 +2 1.5 +20 1.0 +20 1.5 +200 2.0 +200 3.0 201 1.0 201 1.5 +202 1.0 +202 1.5 203 2.0 203 3.0 205 2.0 205 3.0 207 2.0 207 3.0 +208 3.0 +208 4.5 209 2.0 209 3.0 +213 2.0 +213 3.0 214 1.0 214 1.5 216 2.0 216 3.0 +217 2.0 +217 3.0 218 1.0 218 1.5 +219 2.0 +219 3.0 221 2.0 221 3.0 +222 1.0 +222 1.5 223 2.0 223 3.0 +224 2.0 +224 3.0 +226 1.0 +226 1.5 +228 1.0 +228 1.5 229 2.0 229 3.0 230 5.0 230 7.5 +233 2.0 +233 3.0 +235 1.0 +235 1.5 +237 2.0 +237 3.0 238 2.0 238 3.0 +239 2.0 +239 3.0 +24 2.0 +24 3.0 241 1.0 241 1.5 +242 2.0 +242 3.0 +244 1.0 +244 1.5 247 1.0 247 1.5 +248 1.0 +248 1.5 249 1.0 249 1.5 252 1.0 252 1.5 +255 2.0 +255 3.0 256 2.0 256 3.0 +257 1.0 +257 1.5 258 1.0 258 1.5 +26 2.0 +26 3.0 +260 1.0 +260 1.5 +262 1.0 +262 1.5 263 1.0 263 1.5 265 2.0 265 3.0 +266 1.0 +266 1.5 27 1.0 27 1.5 272 2.0 272 3.0 +273 3.0 +273 4.5 274 1.0 274 1.5 +275 1.0 +275 1.5 +277 4.0 +277 6.0 278 2.0 278 3.0 +28 1.0 +28 1.5 +280 2.0 +280 3.0 281 2.0 281 3.0 +282 2.0 +282 3.0 283 1.0 283 1.5 +284 1.0 +284 1.5 285 1.0 285 1.5 +286 1.0 +286 1.5 287 1.0 287 1.5 +288 2.0 +288 3.0 289 1.0 289 1.5 +291 1.0 +291 1.5 292 1.0 292 1.5 296 1.0 @@ -693,54 +847,102 @@ POSTHOOK: Input: default@src 30 1.5 302 1.0 302 1.5 +305 1.0 +305 1.5 306 1.0 306 1.5 +307 2.0 +307 3.0 308 1.0 308 1.5 +309 2.0 +309 3.0 +310 1.0 +310 1.5 311 3.0 311 4.5 315 1.0 315 1.5 +316 3.0 +316 4.5 317 2.0 317 3.0 +318 3.0 +318 4.5 +321 2.0 +321 3.0 322 2.0 322 3.0 +323 1.0 +323 1.5 +325 2.0 +325 3.0 +327 3.0 +327 4.5 +33 1.0 +33 1.5 331 2.0 331 3.0 +332 1.0 +332 1.5 333 2.0 333 3.0 335 1.0 335 1.5 +336 1.0 +336 1.5 +338 1.0 +338 1.5 339 1.0 339 1.5 34 1.0 34 1.5 +341 1.0 +341 1.5 342 2.0 342 3.0 344 2.0 344 3.0 +345 1.0 +345 1.5 348 5.0 348 7.5 +35 3.0 +35 4.5 351 1.0 351 1.5 353 2.0 353 3.0 +356 1.0 +356 1.5 360 1.0 360 1.5 362 1.0 362 1.5 364 1.0 364 1.5 +365 1.0 +365 1.5 366 1.0 366 1.5 +367 2.0 +367 3.0 368 1.0 368 1.5 +369 3.0 +369 4.5 +37 2.0 +37 3.0 373 1.0 373 1.5 +374 1.0 +374 1.5 375 1.0 375 1.5 377 1.0 377 1.5 +378 1.0 +378 1.5 379 1.0 379 1.5 382 2.0 @@ -749,30 +951,60 @@ POSTHOOK: Input: default@src 384 4.5 386 1.0 386 1.5 +389 1.0 +389 1.5 +392 1.0 +392 1.5 393 1.0 393 1.5 +394 1.0 +394 1.5 395 2.0 395 3.0 +396 3.0 +396 4.5 397 2.0 397 3.0 399 2.0 399 3.0 +4 1.0 +4 1.5 +400 1.0 +400 1.5 401 5.0 401 7.5 +402 1.0 +402 1.5 403 3.0 403 4.5 +404 2.0 +404 3.0 +406 4.0 +406 6.0 407 1.0 407 1.5 409 3.0 409 4.5 41 1.0 41 1.5 +411 1.0 +411 1.5 +413 2.0 +413 3.0 414 2.0 414 3.0 +417 3.0 +417 4.5 418 1.0 418 1.5 +419 1.0 +419 1.5 +42 2.0 +42 3.0 421 1.0 421 1.5 +424 2.0 +424 3.0 427 1.0 427 1.5 429 2.0 @@ -781,26 +1013,58 @@ POSTHOOK: Input: default@src 43 1.5 430 3.0 430 4.5 +431 3.0 +431 4.5 432 1.0 432 1.5 +435 1.0 +435 1.5 436 1.0 436 1.5 +437 1.0 +437 1.5 438 3.0 438 4.5 +439 2.0 +439 3.0 +44 1.0 +44 1.5 443 1.0 443 1.5 +444 1.0 +444 1.5 +446 1.0 +446 1.5 +448 1.0 +448 1.5 449 1.0 449 1.5 452 1.0 452 1.5 +453 1.0 +453 1.5 454 3.0 454 4.5 +455 1.0 +455 1.5 +457 1.0 +457 1.5 458 2.0 458 3.0 -463 2.0 -463 3.0 -467 1.0 -467 1.5 +459 2.0 +459 3.0 +460 1.0 +460 1.5 +462 2.0 +462 3.0 +463 2.0 +463 3.0 +466 3.0 +466 4.5 +467 1.0 +467 1.5 +468 4.0 +468 6.0 469 5.0 469 7.5 47 1.0 @@ -809,12 +1073,24 @@ POSTHOOK: Input: default@src 470 1.5 472 1.0 472 1.5 +475 1.0 +475 1.5 +477 1.0 +477 1.5 478 2.0 478 3.0 +479 1.0 +479 1.5 +480 3.0 +480 4.5 481 1.0 481 1.5 +482 1.0 +482 1.5 483 1.0 483 1.5 +484 1.0 +484 1.5 485 1.0 485 1.5 487 1.0 @@ -823,22 +1099,40 @@ POSTHOOK: Input: default@src 489 6.0 490 1.0 490 1.5 +491 1.0 +491 1.5 492 2.0 492 3.0 +493 1.0 +493 1.5 494 1.0 494 1.5 +495 1.0 +495 1.5 496 1.0 496 1.5 +497 1.0 +497 1.5 498 3.0 498 4.5 5 3.0 5 4.5 +51 2.0 +51 3.0 +53 1.0 +53 1.5 54 1.0 54 1.5 +57 1.0 +57 1.5 58 2.0 58 3.0 +64 1.0 +64 1.5 65 1.0 65 1.5 +66 1.0 +66 1.5 67 2.0 67 3.0 69 1.0 @@ -851,12 +1145,24 @@ POSTHOOK: Input: default@src 74 1.5 76 2.0 76 3.0 +77 1.0 +77 1.5 78 1.0 78 1.5 +8 1.0 +8 1.5 +80 1.0 +80 1.5 +82 1.0 +82 1.5 83 2.0 83 3.0 +84 2.0 +84 3.0 85 1.0 85 1.5 +86 1.0 +86 1.5 87 1.0 87 1.5 9 1.0 @@ -865,313 +1171,11 @@ POSTHOOK: Input: default@src 90 4.5 92 1.0 92 1.5 -96 1.0 -96 1.5 -98 2.0 -98 3.0 -0 3.0 -0 4.5 -103 2.0 -103 3.0 -105 1.0 -105 1.5 -11 1.0 -11 1.5 -114 1.0 -114 1.5 -116 1.0 -116 1.5 -118 2.0 -118 3.0 -125 2.0 -125 3.0 -129 2.0 -129 3.0 -134 2.0 -134 3.0 -136 1.0 -136 1.5 -138 4.0 -138 6.0 -143 1.0 -143 1.5 -145 1.0 -145 1.5 -149 2.0 -149 3.0 -15 2.0 -15 3.0 -150 1.0 -150 1.5 -152 2.0 -152 3.0 -156 1.0 -156 1.5 -158 1.0 -158 1.5 -163 1.0 -163 1.5 -165 2.0 -165 3.0 -167 3.0 -167 4.5 -169 4.0 -169 6.0 -17 1.0 -17 1.5 -170 1.0 -170 1.5 -172 2.0 -172 3.0 -174 2.0 -174 3.0 -176 2.0 -176 3.0 -178 1.0 -178 1.5 -181 1.0 -181 1.5 -183 1.0 -183 1.5 -187 3.0 -187 4.5 -189 1.0 -189 1.5 -19 1.0 -19 1.5 -190 1.0 -190 1.5 -192 1.0 -192 1.5 -194 1.0 -194 1.5 -196 1.0 -196 1.5 -2 1.0 -2 1.5 -20 1.0 -20 1.5 -200 2.0 -200 3.0 -202 1.0 -202 1.5 -208 3.0 -208 4.5 -213 2.0 -213 3.0 -217 2.0 -217 3.0 -219 2.0 -219 3.0 -222 1.0 -222 1.5 -224 2.0 -224 3.0 -226 1.0 -226 1.5 -228 1.0 -228 1.5 -233 2.0 -233 3.0 -235 1.0 -235 1.5 -237 2.0 -237 3.0 -239 2.0 -239 3.0 -24 2.0 -24 3.0 -242 2.0 -242 3.0 -244 1.0 -244 1.5 -248 1.0 -248 1.5 -255 2.0 -255 3.0 -257 1.0 -257 1.5 -26 2.0 -26 3.0 -260 1.0 -260 1.5 -262 1.0 -262 1.5 -266 1.0 -266 1.5 -273 3.0 -273 4.5 -275 1.0 -275 1.5 -277 4.0 -277 6.0 -28 1.0 -28 1.5 -280 2.0 -280 3.0 -282 2.0 -282 3.0 -284 1.0 -284 1.5 -286 1.0 -286 1.5 -288 2.0 -288 3.0 -291 1.0 -291 1.5 -305 1.0 -305 1.5 -307 2.0 -307 3.0 -309 2.0 -309 3.0 -310 1.0 -310 1.5 -316 3.0 -316 4.5 -318 3.0 -318 4.5 -321 2.0 -321 3.0 -323 1.0 -323 1.5 -325 2.0 -325 3.0 -327 3.0 -327 4.5 -33 1.0 -33 1.5 -332 1.0 -332 1.5 -336 1.0 -336 1.5 -338 1.0 -338 1.5 -341 1.0 -341 1.5 -345 1.0 -345 1.5 -35 3.0 -35 4.5 -356 1.0 -356 1.5 -365 1.0 -365 1.5 -367 2.0 -367 3.0 -369 3.0 -369 4.5 -37 2.0 -37 3.0 -374 1.0 -374 1.5 -378 1.0 -378 1.5 -389 1.0 -389 1.5 -392 1.0 -392 1.5 -394 1.0 -394 1.5 -396 3.0 -396 4.5 -4 1.0 -4 1.5 -400 1.0 -400 1.5 -402 1.0 -402 1.5 -404 2.0 -404 3.0 -406 4.0 -406 6.0 -411 1.0 -411 1.5 -413 2.0 -413 3.0 -417 3.0 -417 4.5 -419 1.0 -419 1.5 -42 2.0 -42 3.0 -424 2.0 -424 3.0 -431 3.0 -431 4.5 -435 1.0 -435 1.5 -437 1.0 -437 1.5 -439 2.0 -439 3.0 -44 1.0 -44 1.5 -444 1.0 -444 1.5 -446 1.0 -446 1.5 -448 1.0 -448 1.5 -453 1.0 -453 1.5 -455 1.0 -455 1.5 -457 1.0 -457 1.5 -459 2.0 -459 3.0 -460 1.0 -460 1.5 -462 2.0 -462 3.0 -466 3.0 -466 4.5 -468 4.0 -468 6.0 -475 1.0 -475 1.5 -477 1.0 -477 1.5 -479 1.0 -479 1.5 -480 3.0 -480 4.5 -482 1.0 -482 1.5 -484 1.0 -484 1.5 -491 1.0 -491 1.5 -493 1.0 -493 1.5 -495 1.0 -495 1.5 -497 1.0 -497 1.5 -51 2.0 -51 3.0 -53 1.0 -53 1.5 -57 1.0 -57 1.5 -64 1.0 -64 1.5 -66 1.0 -66 1.5 -77 1.0 -77 1.5 -8 1.0 -8 1.5 -80 1.0 -80 1.5 -82 1.0 -82 1.5 -84 2.0 -84 3.0 -86 1.0 -86 1.5 95 2.0 95 3.0 +96 1.0 +96 1.5 97 2.0 97 3.0 +98 2.0 +98 3.0 diff --git ql/src/test/results/clientpositive/union_remove_22.q.out ql/src/test/results/clientpositive/union_remove_22.q.out index c9235c1..71054ae 100644 --- ql/src/test/results/clientpositive/union_remove_22.q.out +++ ql/src/test/results/clientpositive/union_remove_22.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, some columns are repeated. So, union cannot be removed. -- It does not matter, whether the output is merged or not. In this case, merging is turned @@ -12,7 +14,9 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 map-reduce subqueries is performed followed by select and a file sink -- However, some columns are repeated. So, union cannot be removed. -- It does not matter, whether the output is merged or not. In this case, merging is turned @@ -229,14 +233,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 1 -2 1 1 -3 1 1 -7 1 1 -8 2 2 1 1 1 2 1 1 +2 1 1 +3 1 1 3 1 1 7 1 1 +7 1 1 +8 2 2 8 2 2 PREHOOK: query: explain insert overwrite table outputTbl1 diff --git ql/src/test/results/clientpositive/union_remove_6_subq.q.out ql/src/test/results/clientpositive/union_remove_6_subq.q.out index 1b93994..46e0021 100644 --- ql/src/test/results/clientpositive/union_remove_6_subq.q.out +++ ql/src/test/results/clientpositive/union_remove_6_subq.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: -- This is to test the union->selectstar->filesink optimization +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert. @@ -9,7 +11,9 @@ create table inputTbl1(key string, val string) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@inputTbl1 -POSTHOOK: query: -- This is to test the union->selectstar->filesink optimization +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- This is to test the union->selectstar->filesink optimization -- Union of 2 subqueries is performed (all of which are mapred queries) -- followed by select star and a file sink in 2 output tables. -- The optimiaztion does not take affect since it is a multi-table insert.