diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index 8551a5b..24610d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -133,8 +133,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, WalkerCtx walkerCtx = (WalkerCtx) procCtx; // There can be atmost one element eligible to be converted to // metadata only - if ((walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) - || (walkerCtx.getMayBeMetadataOnlyTableScans().size() > 1)) { + if (walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) { return nd; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java index 957c327..165b3ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java @@ -21,6 +21,8 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -128,6 +130,43 @@ private void processAlias(MapWork work, String alias) { } } + private boolean equalAliasList(List one, List two) { + if (one == null && two == null) { + return true; + } + if ((one == null && two != null) || one != null && two == null || one.size() != two.size()) { + return false; + } + one = new ArrayList(one); + two = new ArrayList(two); + Collections.sort(one); + Collections.sort(two); + return one.equals(two); + } + + private void processAlias(MapWork work, HashSet tableScans) { + List aliasList = new ArrayList(); + for (TableScanOperator tso : tableScans) { + // use LinkedHashMap> + // getAliasToWork() + String alias = getAliasForTableScanOperator(work, tso); + aliasList.add(alias); + } + for (String alias : aliasList) { + List paths = getPathsForAlias(work, alias); + for (String path : paths) { + // use Path -> Alias, LinkedHashMap> + ArrayList aliases = work.getPathToAliases().get(path); + //we have to make sure that all the aliases agree to make it null scan + if (equalAliasList(aliasList, aliases)) { + TableScanOperator tso = (TableScanOperator) work.getAliasToWork().get(alias); + tso.getConf().setIsMetadataOnly(true); + processAlias(work, alias); + } + } + } + } + // considered using URLEncoder, but it seemed too much private String encode(Map partSpec) { return partSpec.toString().replaceAll("[:/#\\?]", "_"); @@ -177,16 +216,7 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) LOG.info(String.format("Found %d null table scans", walkerCtx.getMetadataOnlyTableScans().size())); - Iterator iterator - = walkerCtx.getMetadataOnlyTableScans().iterator(); - - while (iterator.hasNext()) { - TableScanOperator tso = iterator.next(); - tso.getConf().setIsMetadataOnly(true); - String alias = getAliasForTableScanOperator(mapWork, tso); - LOG.info("Null table scan for " + alias); - processAlias(mapWork, alias); - } + processAlias(mapWork, walkerCtx.getMetadataOnlyTableScans()); } return null; } diff --git a/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q b/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q new file mode 100644 index 0000000..a73f00b --- /dev/null +++ b/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q @@ -0,0 +1,38 @@ +select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key; + + +select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab; + +select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab; + +select max(ds) from srcpart; + +select count(ds) from srcpart; + + diff --git a/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out b/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out new file mode 100644 index 0000000..0de865e --- /dev/null +++ b/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out @@ -0,0 +1,1056 @@ +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +2 +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +PREHOOK: query: select max(ds) from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select max(ds) from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-09 +PREHOOK: query: select count(ds) from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(ds) from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2000