diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java index 8551a5b..24610d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MetadataOnlyOptimizer.java @@ -133,8 +133,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, WalkerCtx walkerCtx = (WalkerCtx) procCtx; // There can be atmost one element eligible to be converted to // metadata only - if ((walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) - || (walkerCtx.getMayBeMetadataOnlyTableScans().size() > 1)) { + if (walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) { return nd; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java index 957c327..79c7b8b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java @@ -21,9 +21,11 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Collection; -import java.util.Iterator; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Stack; import org.apache.commons.logging.Log; @@ -101,30 +103,46 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { return paths; } - private void processAlias(MapWork work, String alias) { - List paths = getPathsForAlias(work, alias); - if (paths.isEmpty()) { - // partitioned table which don't select any partitions - // there are no paths to replace with fakePath - return; - } + private void processAlias(MapWork work, ArrayList aliases, String path) { + work.setUseOneNullRowInputFormat(true); + for (String alias : aliases) { + // Change the conf for tableScanOp + TableScanOperator tso = (TableScanOperator) work.getAliasToWork().get(alias); + tso.getConf().setIsMetadataOnly(true); + // Change the alias partition desc + PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); + changePartitionToMetadataOnly(aliasPartn); + } + + PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); + PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); + Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() + + newPartition.getTableName() + encode(newPartition.getPartSpec())); + work.getPathToPartitionInfo().remove(path); + work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); + assert(work.getPathToAliases().remove(path).equals(aliases)); + work.getPathToAliases().put(fakePath.getName(), aliases); + } - // Change the alias partition desc - PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); - changePartitionToMetadataOnly(aliasPartn); - - - for (String path : paths) { - PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); - PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); - Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() - + newPartition.getTableName() - + encode(newPartition.getPartSpec())); - work.getPathToPartitionInfo().remove(path); - work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); - ArrayList aliases = work.getPathToAliases().remove(path); - work.getPathToAliases().put(fakePath.getName(), aliases); + private void processAlias(MapWork work, HashSet tableScans) { + ArrayList aliasList = new ArrayList(); + for (TableScanOperator tso : tableScans) { + // use LinkedHashMap> + // getAliasToWork() + String alias = getAliasForTableScanOperator(work, tso); + aliasList.add(alias); + } + // group path alias according to work + LinkedHashMap> candidates = new LinkedHashMap>(); + for (String path : work.getPaths()) { + ArrayList aliases = work.getPathToAliases().get(path); + if (aliases != null && aliasList.containsAll(aliases)) { + candidates.put(path, aliases); + } + } + for (Entry> entry : candidates.entrySet()) { + processAlias(work, entry.getValue(), entry.getKey()); } } @@ -177,16 +195,8 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) LOG.info(String.format("Found %d null table scans", walkerCtx.getMetadataOnlyTableScans().size())); - Iterator iterator - = walkerCtx.getMetadataOnlyTableScans().iterator(); - - while (iterator.hasNext()) { - TableScanOperator tso = iterator.next(); - tso.getConf().setIsMetadataOnly(true); - String alias = getAliasForTableScanOperator(mapWork, tso); - LOG.info("Null table scan for " + alias); - processAlias(mapWork, alias); - } + if (walkerCtx.getMetadataOnlyTableScans().size() > 0) + processAlias(mapWork, walkerCtx.getMetadataOnlyTableScans()); } return null; } diff --git a/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q b/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q new file mode 100644 index 0000000..a26ef1a --- /dev/null +++ b/ql/src/test/queries/clientpositive/metadataOnlyOptimizer.q @@ -0,0 +1,44 @@ +select key from( +select '1' as key from srcpart where ds="2008-04-09" +UNION all +SELECT key from srcpart where ds="2008-04-09" and hr="11" +) tab group by key; + +select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key; + + +select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key; + +select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab; + +select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab; + +select max(ds) from srcpart; + +select count(ds) from srcpart; + + diff --git a/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out b/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out new file mode 100644 index 0000000..1fcbc0a --- /dev/null +++ b/ql/src/test/results/clientpositive/metadataOnlyOptimizer.q.out @@ -0,0 +1,1386 @@ +PREHOOK: query: select key from( +select '1' as key from srcpart where ds="2008-04-09" +UNION all +SELECT key from srcpart where ds="2008-04-09" and hr="11" +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key from( +select '1' as key from srcpart where ds="2008-04-09" +UNION all +SELECT key from srcpart where ds="2008-04-09" and hr="11" +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select max(key) from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT '2' as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +2 +PREHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from( +SELECT '1' as key from src +UNION all +SELECT key as key from src +UNION all +SELECT '2' as key from src +UNION all +SELECT key as key from src +) tab group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +1 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +12 +120 +125 +126 +128 +129 +131 +133 +134 +136 +137 +138 +143 +145 +146 +149 +15 +150 +152 +153 +155 +156 +157 +158 +160 +162 +163 +164 +165 +166 +167 +168 +169 +17 +170 +172 +174 +175 +176 +177 +178 +179 +18 +180 +181 +183 +186 +187 +189 +19 +190 +191 +192 +193 +194 +195 +196 +197 +199 +2 +20 +200 +201 +202 +203 +205 +207 +208 +209 +213 +214 +216 +217 +218 +219 +221 +222 +223 +224 +226 +228 +229 +230 +233 +235 +237 +238 +239 +24 +241 +242 +244 +247 +248 +249 +252 +255 +256 +257 +258 +26 +260 +262 +263 +265 +266 +27 +272 +273 +274 +275 +277 +278 +28 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +291 +292 +296 +298 +30 +302 +305 +306 +307 +308 +309 +310 +311 +315 +316 +317 +318 +321 +322 +323 +325 +327 +33 +331 +332 +333 +335 +336 +338 +339 +34 +341 +342 +344 +345 +348 +35 +351 +353 +356 +360 +362 +364 +365 +366 +367 +368 +369 +37 +373 +374 +375 +377 +378 +379 +382 +384 +386 +389 +392 +393 +394 +395 +396 +397 +399 +4 +400 +401 +402 +403 +404 +406 +407 +409 +41 +411 +413 +414 +417 +418 +419 +42 +421 +424 +427 +429 +43 +430 +431 +432 +435 +436 +437 +438 +439 +44 +443 +444 +446 +448 +449 +452 +453 +454 +455 +457 +458 +459 +460 +462 +463 +466 +467 +468 +469 +47 +470 +472 +475 +477 +478 +479 +480 +481 +482 +483 +484 +485 +487 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +5 +51 +53 +54 +57 +58 +64 +65 +66 +67 +69 +70 +72 +74 +76 +77 +78 +8 +80 +82 +83 +84 +85 +86 +87 +9 +90 +92 +95 +96 +97 +98 +PREHOOK: query: select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select k from (SELECT '1' as k from src limit 0 union all select key as k from src limit 1)tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select k from (SELECT '1' as k from src limit 1 union all select key as k from src limit 0)tab +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1 +PREHOOK: query: select max(ds) from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select max(ds) from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2008-04-09 +PREHOOK: query: select count(ds) from srcpart +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select count(ds) from srcpart +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +#### A masked pattern was here #### +2000 diff --git a/ql/src/test/results/clientpositive/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/optimize_nullscan.q.out index 02636ba..f968a24 100644 --- a/ql/src/test/results/clientpositive/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/optimize_nullscan.q.out @@ -2091,9 +2091,9 @@ STAGE PLANS: value expressions: key (type: string) auto parallelism: false Path -> Alias: - -mr-10003default.src{} [s1, s2] + -mr-10002default.src{} [s1, s2] Path -> Partition: - -mr-10003default.src{} + -mr-10002default.src{} Partition base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat @@ -2138,7 +2138,7 @@ STAGE PLANS: name: default.src name: default.src Truncated Path -> Alias: - -mr-10003default.src{} [s1, s2] + -mr-10002default.src{} [s1, s2] Needs Tagging: true Reduce Operator Tree: Join Operator