diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java index 79c7b8b..598e7d1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java @@ -102,47 +102,51 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc) { return paths; } - - private void processAlias(MapWork work, ArrayList aliases, String path) { - - work.setUseOneNullRowInputFormat(true); - for (String alias : aliases) { - // Change the conf for tableScanOp - TableScanOperator tso = (TableScanOperator) work.getAliasToWork().get(alias); - tso.getConf().setIsMetadataOnly(true); - // Change the alias partition desc - PartitionDesc aliasPartn = work.getAliasToPartnInfo().get(alias); - changePartitionToMetadataOnly(aliasPartn); + + private void processAlias(MapWork work, String path, ArrayList aliasesAffected, + ArrayList aliases) { + // the aliases that are allowed to map to a null scan. + ArrayList allowed = new ArrayList(); + for (String alias : aliasesAffected) { + if (aliases.contains(alias)) { + allowed.add(alias); + } + } + if (allowed.size() > 0) { + work.setUseOneNullRowInputFormat(true); + PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone(); + PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); + Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() + + newPartition.getTableName() + encode(newPartition.getPartSpec())); + work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); + work.getPathToAliases().put(fakePath.getName(), new ArrayList(allowed)); + aliasesAffected.removeAll(allowed); + if (aliasesAffected.isEmpty()) { + work.getPathToAliases().remove(path); + work.getPathToPartitionInfo().remove(path); + } } - - PartitionDesc partDesc = work.getPathToPartitionInfo().get(path); - PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc); - Path fakePath = new Path(physicalContext.getContext().getMRTmpPath() - + newPartition.getTableName() + encode(newPartition.getPartSpec())); - work.getPathToPartitionInfo().remove(path); - work.getPathToPartitionInfo().put(fakePath.getName(), newPartition); - assert(work.getPathToAliases().remove(path).equals(aliases)); - work.getPathToAliases().put(fakePath.getName(), aliases); } private void processAlias(MapWork work, HashSet tableScans) { - ArrayList aliasList = new ArrayList(); + ArrayList aliases = new ArrayList(); for (TableScanOperator tso : tableScans) { // use LinkedHashMap> // getAliasToWork() String alias = getAliasForTableScanOperator(work, tso); - aliasList.add(alias); + aliases.add(alias); + tso.getConf().setIsMetadataOnly(true); } // group path alias according to work LinkedHashMap> candidates = new LinkedHashMap>(); for (String path : work.getPaths()) { - ArrayList aliases = work.getPathToAliases().get(path); - if (aliases != null && aliasList.containsAll(aliases)) { - candidates.put(path, aliases); + ArrayList aliasesAffected = work.getPathToAliases().get(path); + if (aliasesAffected != null && aliasesAffected.size() > 0) { + candidates.put(path, aliasesAffected); } } for (Entry> entry : candidates.entrySet()) { - processAlias(work, entry.getValue(), entry.getKey()); + processAlias(work, entry.getKey(), entry.getValue(), aliases); } } diff --git a/ql/src/test/results/clientpositive/metadataonly1.q.out b/ql/src/test/results/clientpositive/metadataonly1.q.out index 24cd9fb..ce40987 100644 --- a/ql/src/test/results/clientpositive/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/metadataonly1.q.out @@ -146,7 +146,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -292,7 +291,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -621,7 +619,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -661,7 +658,6 @@ STAGE PLANS: name: default.test1 -mr-10005default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1075,7 +1071,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1116,7 +1111,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1157,7 +1151,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1554,7 +1547,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1594,7 +1586,6 @@ STAGE PLANS: name: default.test1 -mr-10003default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1805,7 +1796,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=01_10_10, hr=01} Partition - base file name: hr=01 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1846,7 +1836,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=01_10_20, hr=02} Partition - base file name: hr=02 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1887,7 +1876,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1928,7 +1916,6 @@ STAGE PLANS: name: default.test2 -mr-10005default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1969,7 +1956,6 @@ STAGE PLANS: name: default.test2 -mr-10006default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: diff --git a/ql/src/test/results/clientpositive/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/optimize_nullscan.q.out index f968a24..5974439 100644 --- a/ql/src/test/results/clientpositive/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/optimize_nullscan.q.out @@ -73,7 +73,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -316,7 +315,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -362,7 +360,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -408,7 +405,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -454,7 +450,6 @@ STAGE PLANS: name: default.srcpart -mr-10007default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -567,7 +562,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -780,7 +774,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1310,7 +1303,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1356,7 +1348,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1402,7 +1393,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1448,7 +1438,6 @@ STAGE PLANS: name: default.srcpart -mr-10007default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1559,7 +1548,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1779,7 +1767,6 @@ STAGE PLANS: Path -> Partition: -mr-10004default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1928,7 +1915,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -2095,7 +2081,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: diff --git a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out index b8f1e11..909873a 100644 --- a/ql/src/test/results/clientpositive/tez/metadataonly1.q.out +++ b/ql/src/test/results/clientpositive/tez/metadataonly1.q.out @@ -157,7 +157,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -309,7 +308,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1037,7 +1035,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1078,7 +1075,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1119,7 +1115,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1528,7 +1523,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test1{ds=1} Partition - base file name: ds=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1568,7 +1562,6 @@ STAGE PLANS: name: default.test1 -mr-10003default.test1{ds=2} Partition - base file name: ds=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1785,7 +1778,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.test2{ds=01_10_10, hr=01} Partition - base file name: hr=01 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1826,7 +1818,6 @@ STAGE PLANS: name: default.test2 -mr-10003default.test2{ds=01_10_20, hr=02} Partition - base file name: hr=02 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1867,7 +1858,6 @@ STAGE PLANS: name: default.test2 -mr-10004default.test2{ds=1, hr=1} Partition - base file name: hr=1 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1908,7 +1898,6 @@ STAGE PLANS: name: default.test2 -mr-10005default.test2{ds=1, hr=2} Partition - base file name: hr=2 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1949,7 +1938,6 @@ STAGE PLANS: name: default.test2 -mr-10006default.test2{ds=1, hr=3} Partition - base file name: hr=3 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: diff --git a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out index 2d07968..a8fe550 100644 --- a/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/tez/optimize_nullscan.q.out @@ -248,7 +248,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -319,7 +318,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -365,7 +363,6 @@ STAGE PLANS: name: default.srcpart -mr-10004default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -411,7 +408,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -457,7 +453,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -677,7 +672,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1115,7 +1109,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1186,7 +1179,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.srcpart{ds=2008-04-08, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1232,7 +1224,6 @@ STAGE PLANS: name: default.srcpart -mr-10004default.srcpart{ds=2008-04-08, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1278,7 +1269,6 @@ STAGE PLANS: name: default.srcpart -mr-10005default.srcpart{ds=2008-04-09, hr=11} Partition - base file name: hr=11 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1324,7 +1314,6 @@ STAGE PLANS: name: default.srcpart -mr-10006default.srcpart{ds=2008-04-09, hr=12} Partition - base file name: hr=12 input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: @@ -1634,7 +1623,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1806,7 +1794,6 @@ STAGE PLANS: Path -> Partition: -mr-10002default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: @@ -1873,7 +1860,6 @@ STAGE PLANS: Path -> Partition: -mr-10003default.src{} Partition - base file name: src input format: org.apache.hadoop.hive.ql.io.OneNullRowInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: