diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java index bc8afbf..d179a02 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanOptimizer.java @@ -18,20 +18,19 @@ package org.apache.hadoop.hive.ql.optimizer.physical; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Queue; import java.util.Stack; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.hive.ql.exec.FilterOperator; import org.apache.hadoop.hive.ql.exec.LimitOperator; -import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -45,6 +44,8 @@ import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * This optimizer attempts following two optimizations: @@ -54,7 +55,9 @@ */ public class NullScanOptimizer implements PhysicalPlanResolver { - private static final Logger LOG = LoggerFactory.getLogger(NullScanOptimizer.class.getName()); + private static final Logger LOG = + LoggerFactory.getLogger(NullScanOptimizer.class.getName()); + @Override public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { @@ -85,22 +88,23 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { static private boolean isNullOpPresentInAllBranches(TableScanOperator ts, Node causeOfNullNode) { Node curNode = null; List curChd = null; - LinkedList middleNodes = new LinkedList(); - middleNodes.addLast(ts); + Queue middleNodes = new ArrayDeque(); + middleNodes.add(ts); while (!middleNodes.isEmpty()) { curNode = middleNodes.remove(); curChd = curNode.getChildren(); for (Node chd: curChd) { - if (chd.getChildren() == null || chd.getChildren().isEmpty() || chd == causeOfNullNode) { - if (chd != causeOfNullNode) { // If there is an end node that not the limit0/wherefalse.. + List children = chd.getChildren(); + if (CollectionUtils.isEmpty(children) || chd == causeOfNullNode) { + // If there is an end node that not the limit0/wherefalse.. + if (chd != causeOfNullNode) { return false; } } else { - middleNodes.addLast(chd); + middleNodes.add(chd); } } - } return true; } @@ -126,7 +130,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, if (op instanceof TableScanOperator) { if (isNullOpPresentInAllBranches((TableScanOperator)op, filter)) { ctx.setMayBeMetadataOnly((TableScanOperator)op); - LOG.info("Found where false TableScan. " + op); + LOG.debug("Found where false TableScan. {}", op); } } } @@ -142,7 +146,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, Object... nodeOutputs) throws SemanticException { LimitOperator limitOp = (LimitOperator)nd; - if(!(limitOp.getConf().getLimit() == 0)) { + if (!(limitOp.getConf().getLimit() == 0)) { return null; } @@ -153,7 +157,7 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, tsOp.remove(); } } - LOG.info("Found Limit 0 TableScan. " + nd); + LOG.debug("Found Limit 0 TableScan. {}", nd); ((WalkerCtx)procCtx).convertMetadataOnly(); return null; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java index 6c0e71d..6d7efa9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/NullScanTaskDispatcher.java @@ -35,10 +35,14 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Set; import java.util.Stack; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; @@ -63,17 +67,19 @@ import org.apache.hadoop.hive.serde2.NullStructSerDe; /** - * Iterate over all tasks one by one and removes all input paths from task if conditions as - * defined in rules match. + * Iterate over all tasks one by one and removes all input paths from task if + * conditions as defined in rules match. */ public class NullScanTaskDispatcher implements Dispatcher { - static final Logger LOG = LoggerFactory.getLogger(NullScanTaskDispatcher.class.getName()); + static final Logger LOG = + LoggerFactory.getLogger(NullScanTaskDispatcher.class.getName()); private final PhysicalContext physicalContext; private final Map rules; - public NullScanTaskDispatcher(PhysicalContext context, Map rules) { + public NullScanTaskDispatcher(PhysicalContext context, + Map rules) { super(); physicalContext = context; this.rules = rules; @@ -81,19 +87,20 @@ public NullScanTaskDispatcher(PhysicalContext context, Map private String getAliasForTableScanOperator(MapWork work, TableScanOperator tso) { - - for (Map.Entry> entry : - work.getAliasToWork().entrySet()) { + for (Map.Entry> entry : work + .getAliasToWork().entrySet()) { if (entry.getValue() == tso) { return entry.getKey(); } } - return null; } - private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc, Path path) { - if (desc == null) return null; + private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc, + Path path) { + if (desc == null) { + return null; + } boolean isEmpty = false; try { isEmpty = Utilities.isEmptyPath(physicalContext.getConf(), path); @@ -104,25 +111,23 @@ private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc, Path pat isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class); desc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class); desc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB, - NullStructSerDe.class.getName()); + NullStructSerDe.class.getName()); return desc; } - private void processAlias(MapWork work, Path path, ArrayList aliasesAffected, - ArrayList aliases) { + private void processAlias(MapWork work, Path path, + Collection aliasesAffected, Set aliases) { // the aliases that are allowed to map to a null scan. - ArrayList allowed = new ArrayList(); - for (String alias : aliasesAffected) { - if (aliases.contains(alias)) { - allowed.add(alias); - } - } - if (allowed.size() > 0) { + Collection allowed = aliasesAffected.stream() + .filter(a -> aliases.contains(a)).collect(Collectors.toList()); + if (!allowed.isEmpty()) { PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone(); - PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc, path); + PartitionDesc newPartition = + changePartitionToMetadataOnly(partDesc, path); // Prefix partition with something to avoid it being a hidden file. - Path fakePath = new Path(NullScanFileSystem.getBase() + newPartition.getTableName() - + "/part" + encode(newPartition.getPartSpec())); + Path fakePath = + new Path(NullScanFileSystem.getBase() + newPartition.getTableName() + + "/part" + encode(newPartition.getPartSpec())); StringInternUtils.internUriStringsInPath(fakePath); work.addPathToPartitionInfo(fakePath, newPartition); work.addPathToAlias(fakePath, new ArrayList<>(allowed)); @@ -134,12 +139,12 @@ private void processAlias(MapWork work, Path path, ArrayList aliasesAffe } } - private void processAlias(MapWork work, HashSet tableScans) { - ArrayList aliases = new ArrayList(); + private void processAlias(MapWork work, + HashSet tableScans) { + Set aliases = new HashSet<>(); for (TableScanOperator tso : tableScans) { // use LinkedHashMap> - // getAliasToWork() - // should not apply this for non-native table + // getAliasToWork() should not apply this for non-native table if (tso.getConf().getTableMetadata().getStorageHandler() != null) { continue; } @@ -151,7 +156,7 @@ private void processAlias(MapWork work, HashSet tableScans) { LinkedHashMap> candidates = new LinkedHashMap<>(); for (Path path : work.getPaths()) { ArrayList aliasesAffected = work.getPathToAliases().get(path); - if (aliasesAffected != null && aliasesAffected.size() > 0) { + if (CollectionUtils.isNotEmpty(aliasesAffected)) { candidates.put(path, aliasesAffected); } } @@ -183,10 +188,10 @@ public int compare(MapWork o1, MapWork o2) { }); for (MapWork mapWork : mapWorks) { - LOG.debug("Looking at: "+mapWork.getName()); - Collection> topOperators - = mapWork.getAliasToWork().values(); - if (topOperators.size() == 0) { + LOG.debug("Looking at: {}", mapWork.getName()); + Collection> topOperators = + mapWork.getAliasToWork().values(); + if (topOperators.isEmpty()) { LOG.debug("No top operators"); return null; } @@ -201,9 +206,9 @@ public int compare(MapWork o1, MapWork o2) { // Create a list of topOp nodes ArrayList topNodes = new ArrayList(); // Get the top Nodes for this task - for (Operator - workOperator : topOperators) { - if (parseContext.getTopOps().values().contains(workOperator)) { + Collection topOps = parseContext.getTopOps().values(); + for (Operator workOperator : topOperators) { + if (topOps.contains(workOperator)) { topNodes.add(workOperator); } } @@ -215,10 +220,11 @@ public int compare(MapWork o1, MapWork o2) { ogw.startWalking(topNodes, null); - LOG.debug(String.format("Found %d null table scans", - walkerCtx.getMetadataOnlyTableScans().size())); - if (walkerCtx.getMetadataOnlyTableScans().size() > 0) + int scanTableSize = walkerCtx.getMetadataOnlyTableScans().size(); + LOG.debug("Found {} null table scans", scanTableSize); + if (scanTableSize > 0) { processAlias(mapWork, walkerCtx.getMetadataOnlyTableScans()); + } } return null; }