diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java index 41607aeec5..224e0e1f17 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java @@ -22,8 +22,11 @@ import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.SemanticException; +import java.util.*; + public class ExpressionWalker extends DefaultGraphWalker { + protected final IdentityHashMap exprSeenMap = new IdentityHashMap(); /** * Constructor. * @@ -64,33 +67,38 @@ private boolean shouldByPass(Node childNode, Node parentNode) { protected void walk(Node nd) throws SemanticException { // Push the node in the stack opStack.push(nd); + Node prevVisitedNd = null; // While there are still nodes to dispatch... while (!opStack.empty()) { Node node = opStack.peek(); + boolean dispatchNode = false; - if (node.getChildren() == null || - getDispatchedList().containsAll(node.getChildren())) { - // Dispatch current node - if (!getDispatchedList().contains(node)) { - dispatch(node, opStack); - opQueue.add(node); - } - opStack.pop(); - continue; - } - - // Add a single child and restart the loop - for (Node childNode : node.getChildren()) { - if (!getDispatchedList().contains(childNode)) { + if(node.getChildren() == null) { + dispatchNode = true; + } else if((prevVisitedNd != null && prevVisitedNd == node.getChildren().get(0)) + || exprSeenMap.containsKey(node.getChildren().get(0))) { + // since subquery expression nodes are bypassed, we can not rely only on checking previous + // visited node. + // all children of node has been processed + dispatchNode = true; + } else { + for (Node childNode : node.getChildren()) { if(shouldByPass(childNode, node)) { retMap.put(childNode, null); + exprSeenMap.put(childNode, null); } else { opStack.push(childNode); } - break; } } + + if(dispatchNode) { + dispatch(node, opStack); + opQueue.add(node); + opStack.pop(); + } + prevVisitedNd = node; } // end while } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/PostOrderWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/PostOrderWalker.java new file mode 100644 index 0000000000..88434f5b77 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/PostOrderWalker.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.parse.SemanticException; + +/** + * Walker to traverse an expression tree in reverse post order + * e.g. (Right child, Left child, Parent) + * This is an iterative implementation of post order traversal used for iterating + * over an expression Tree. This should not be used to iterate over DAG/Graphs since + * it assume that each node has only one incoming edge. + */ +public class PostOrderWalker implements GraphWalker { + + /** + * opStack keeps the nodes that have been visited, but have not been + * dispatched yet + */ + protected final Stack opStack; + /** + * opQueue keeps the nodes in the order that the were dispatched. + * Then it is used to go through the processed nodes and store + * the results that the dispatcher has produced (if any) + */ + protected final Queue opQueue; + /** + * toWalk stores the starting nodes for the graph that needs to be + * traversed + */ + protected final List toWalk = new ArrayList(); + protected final IdentityHashMap retMap = new IdentityHashMap(); + protected final Dispatcher dispatcher; + + /** + * Constructor. + * + * @param disp + * dispatcher to call for each op encountered + */ + public PostOrderWalker(Dispatcher disp) { + dispatcher = disp; + opStack = new Stack(); + opQueue = new LinkedList(); + } + + /** + * @return the doneList + */ + protected Set getDispatchedList() { + return retMap.keySet(); + } + + /** + * Dispatch the current operator. + * + * @param nd + * node being walked + * @param ndStack + * stack of nodes encountered + * @throws SemanticException + */ + public void dispatch(Node nd, Stack ndStack) throws SemanticException { + dispatchAndReturn(nd, ndStack); + } + + /** + * Returns dispatch result + */ + public T dispatchAndReturn(Node nd, Stack ndStack) throws SemanticException { + Object[] nodeOutputs = null; + if (nd.getChildren() != null) { + nodeOutputs = new Object[nd.getChildren().size()]; + int i = 0; + for (Node child : nd.getChildren()) { + nodeOutputs[i++] = retMap.get(child); + } + } + + Object retVal = dispatcher.dispatch(nd, ndStack, nodeOutputs); + retMap.put(nd, retVal); + return (T) retVal; + } + + /** + * starting point for walking. + * + * @throws SemanticException + */ + public void startWalking(Collection startNodes, + HashMap nodeOutput) throws SemanticException { + toWalk.addAll(startNodes); + while (toWalk.size() > 0) { + Node nd = toWalk.remove(0); + walk(nd); + // Some walkers extending DefaultGraphWalker e.g. ForwardWalker + // do not use opQueue and rely uniquely in the toWalk structure, + // thus we store the results produced by the dispatcher here + // TODO: rewriting the logic of those walkers to use opQueue + if (nodeOutput != null && getDispatchedList().contains(nd)) { + nodeOutput.put(nd, retMap.get(nd)); + } + } + + // Store the results produced by the dispatcher + while (!opQueue.isEmpty()) { + Node node = opQueue.poll(); + if (nodeOutput != null && getDispatchedList().contains(node)) { + nodeOutput.put(node, retMap.get(node)); + } + } + } + + /** + * walk the current operator and its descendants. + * + * @param nd + * current operator in the graph + * @throws SemanticException + */ + protected void walk(Node nd) throws SemanticException { + // Push the node in the stack + opStack.push(nd); + Node prevVisitedNd = null; + + // While there are still nodes to dispatch... + while (!opStack.empty()) { + Node node = opStack.peek(); + boolean dispatchNode = false; + + if(node.getChildren() == null) { + dispatchNode = true; + } else if(prevVisitedNd != null && prevVisitedNd == node.getChildren().get(0)) { + // all children of node has been processetad + dispatchNode = true; + } else { + for (Node childNode : node.getChildren()) { + opStack.push(childNode); + } + } + + if(dispatchNode) { + dispatch(node, opStack); + opQueue.add(node); + opStack.pop(); + } + prevVisitedNd = node; + } // end while + } + +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index f612cd2535..0c6d442173 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.pcr; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import java.util.ArrayList; @@ -28,15 +29,6 @@ import java.util.Stack; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -588,7 +580,7 @@ public static NodeInfoWrapper walkExprTree( // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx); - GraphWalker egw = new DefaultGraphWalker(disp); + GraphWalker egw = new PostOrderWalker(disp); List startNodes = new ArrayList(); startNodes.add(pred); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index 1c662d7cfb..487260905a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -29,15 +29,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -346,7 +338,7 @@ public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext); - GraphWalker egw = new DefaultGraphWalker(disp); + GraphWalker egw = new PostOrderWalker(disp); List startNodes = new ArrayList(); List clonedPreds = new ArrayList();