diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java index 41607aeec5..938d671207 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/ExpressionWalker.java @@ -18,11 +18,16 @@ package org.apache.hadoop.hive.ql.lib; -import org.apache.hadoop.hive.ql.parse.ASTNode; -import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.hive.ql.parse.SemanticException; -public class ExpressionWalker extends DefaultGraphWalker { +import java.util.Stack; + +/** + * class for traversing tree + * This class assumes that the given node represents TREE and not GRAPH + * i.e. there is only single path to reach a node + */ +public class ExpressionWalker extends DefaultGraphWalker{ /** * Constructor. @@ -35,63 +40,69 @@ public ExpressionWalker(Dispatcher disp) { } - /** - * We should bypass subquery since we have already processed and created logical plan - * (in genLogicalPlan) for subquery at this point. - * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc. - */ - private boolean shouldByPass(Node childNode, Node parentNode) { - if(parentNode instanceof ASTNode - && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) { - ASTNode parentOp = (ASTNode)parentNode; - //subquery either in WHERE IN form OR WHERE EXISTS form - //in first case LHS should not be bypassed - assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3); - if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2)) { - return false; - } - return true; + private class NodeLabeled { + private Node nd; + private int currChildIdx; + + NodeLabeled(Node nd) { + this.nd = nd; + this.currChildIdx = -1; + } + + public void incrementChildIdx() { + this.currChildIdx++; + } + + public int getCurrChildIdx() { + return this.currChildIdx; } + + public Node getNd() { + return this.nd; + } + } + + protected boolean shouldByPass(Node childNode, Node parentNode) { return false; } + /** * walk the current operator and its descendants. * * @param nd - * current operator in the graph + * current operator in the tree * @throws SemanticException */ protected void walk(Node nd) throws SemanticException { - // Push the node in the stack + Stack traversalStack = new Stack<>(); + traversalStack.push(new NodeLabeled(nd)); + opStack.push(nd); - // While there are still nodes to dispatch... - while (!opStack.empty()) { - Node node = opStack.peek(); + while(!traversalStack.isEmpty()) { + NodeLabeled currLabeledNode = traversalStack.peek(); + Node currNode = currLabeledNode.getNd(); + int currIdx = currLabeledNode.getCurrChildIdx(); - if (node.getChildren() == null || - getDispatchedList().containsAll(node.getChildren())) { - // Dispatch current node - if (!getDispatchedList().contains(node)) { - dispatch(node, opStack); - opQueue.add(node); + if(currNode.getChildren() != null && currNode.getChildren().size() > currIdx + 1) { + Node nextChild = currNode.getChildren().get(currIdx+1); + //check if this node should be skipped and not dispatched + if(shouldByPass(nextChild, currNode)) { + retMap.put(nextChild, null); + currLabeledNode.incrementChildIdx(); + continue; } + traversalStack.push(new NodeLabeled(nextChild)); + opStack.push(nextChild); + currLabeledNode.incrementChildIdx(); + } else { + // dispatch the node + dispatch(currNode, opStack); + opQueue.add(currNode); opStack.pop(); - continue; + traversalStack.pop(); } - - // Add a single child and restart the loop - for (Node childNode : node.getChildren()) { - if (!getDispatchedList().contains(childNode)) { - if(shouldByPass(childNode, node)) { - retMap.put(childNode, null); - } else { - opStack.push(childNode); - } - break; - } - } - } // end while + } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java new file mode 100644 index 0000000000..75f09e4617 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/SubqueryExpressionWalker.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.lib; + +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.HiveParser; + +public class SubqueryExpressionWalker extends ExpressionWalker{ + + /** + * Constructor. + * + * @param disp + * dispatcher to call for each op encountered + */ + public SubqueryExpressionWalker(Dispatcher disp) { + super(disp); + } + + + /** + * We should bypass subquery since we have already processed and created logical plan + * (in genLogicalPlan) for subquery at this point. + * SubQueryExprProcessor will use generated plan and creates appropriate ExprNodeSubQueryDesc. + */ + protected boolean shouldByPass(Node childNode, Node parentNode) { + if(parentNode instanceof ASTNode + && ((ASTNode)parentNode).getType() == HiveParser.TOK_SUBQUERY_EXPR) { + ASTNode parentOp = (ASTNode)parentNode; + //subquery either in WHERE IN form OR WHERE EXISTS form + //in first case LHS should not be bypassed + assert(parentOp.getChildCount() == 2 || parentOp.getChildCount()==3); + if(parentOp.getChildCount() == 3 && (ASTNode)childNode == parentOp.getChild(2)) { + return false; + } + return true; + } + return false; + } + +} + diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java index f612cd2535..4dc5f0bffc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/pcr/PcrExprProcFactory.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.optimizer.pcr; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import java.util.ArrayList; @@ -28,15 +29,6 @@ import java.util.Stack; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; @@ -588,7 +580,7 @@ public static NodeInfoWrapper walkExprTree( // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, pprCtx); - GraphWalker egw = new DefaultGraphWalker(disp); + GraphWalker egw = new ExpressionWalker(disp); List startNodes = new ArrayList(); startNodes.add(pred); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index a4c1b9ab38..52198139e2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -47,15 +47,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.ExpressionWalker; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory; @@ -236,7 +228,7 @@ public static ExprNodeDesc processGByExpr(Node nd, Object procCtx) // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(tf.getDefaultExprProcessor(), opRules, tcCtx); - GraphWalker ogw = new ExpressionWalker(disp); + GraphWalker ogw = new SubqueryExpressionWalker(disp); // Create a list of top nodes ArrayList topNodes = Lists.newArrayList(expr); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java index 1c662d7cfb..a61b02560c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/ppd/ExprWalkerProcFactory.java @@ -29,15 +29,7 @@ import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.RowSchema; -import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; -import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; -import org.apache.hadoop.hive.ql.lib.Dispatcher; -import org.apache.hadoop.hive.ql.lib.GraphWalker; -import org.apache.hadoop.hive.ql.lib.Node; -import org.apache.hadoop.hive.ql.lib.NodeProcessor; -import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; -import org.apache.hadoop.hive.ql.lib.Rule; -import org.apache.hadoop.hive.ql.lib.TypeRule; +import org.apache.hadoop.hive.ql.lib.*; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -346,7 +338,7 @@ public static ExprWalkerInfo extractPushdownPreds(OpWalkerInfo opContext, // rule and passes the context along Dispatcher disp = new DefaultRuleDispatcher(getDefaultExprProcessor(), exprRules, exprContext); - GraphWalker egw = new DefaultGraphWalker(disp); + GraphWalker egw = new ExpressionWalker(disp); List startNodes = new ArrayList(); List clonedPreds = new ArrayList();