diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java index 8400ee8..16b055b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ASTNode.java @@ -19,12 +19,15 @@ package org.apache.hadoop.hive.ql.parse; import java.io.Serializable; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.List; import org.antlr.runtime.Token; import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.ql.lib.Node; /** @@ -38,6 +41,7 @@ private transient int endIndx = -1; private transient ASTNode rootNode; private transient boolean isValidASTStr; + private transient boolean visited = false; public ASTNode() { } @@ -92,6 +96,49 @@ public String getName() { } /** + * For every node in this subtree, make sure it's start/stop token's + * are set. Walk depth first, visit bottom up. Only updates nodes + * with at least one token index < 0. + * + * In contrast to the method in the parent class, this method is + * iterative. + */ + @Override + public void setUnknownTokenBoundaries() { + Deque stack1 = new ArrayDeque(); + Deque stack2 = new ArrayDeque(); + stack1.push(this); + + while (!stack1.isEmpty()) { + ASTNode next = stack1.pop(); + stack2.push(next); + + if (next.children != null) { + for (int i = next.children.size() - 1; i >= 0 ; i--) { + stack1.push((ASTNode)next.children.get(i)); + } + } + } + + while (!stack2.isEmpty()) { + ASTNode next = stack2.pop(); + + if (next.children == null) { + if (next.startIndex < 0 || next.stopIndex < 0) { + next.startIndex = next.stopIndex = next.token.getTokenIndex(); + } + } else if (next.startIndex >= 0 && next.stopIndex >= 0) { + continue; + } else if (next.children.size() > 0) { + ASTNode firstChild = (ASTNode)next.children.get(0); + ASTNode lastChild = (ASTNode)next.children.get(next.children.size()-1); + next.startIndex = firstChild.getTokenStartIndex(); + next.stopIndex = lastChild.getTokenStopIndex(); + } + } + } + + /** * @return information about the object from which this ASTNode originated, or * null if this ASTNode was not expanded from an object reference */ @@ -109,27 +156,38 @@ public void setOrigin(ASTNodeOrigin origin) { public String dump() { StringBuilder sb = new StringBuilder("\n"); - dump(sb, ""); + dump(sb); return sb.toString(); } - private StringBuilder dump(StringBuilder sb, String ws) { - sb.append(ws); - sb.append(toString()); - sb.append("\n"); - - ArrayList children = getChildren(); - if (children != null) { - for (Node node : getChildren()) { - if (node instanceof ASTNode) { - ((ASTNode) node).dump(sb, ws + " "); - } else { - sb.append(ws); - sb.append(" NON-ASTNODE!!"); - sb.append("\n"); + private StringBuilder dump(StringBuilder sb) { + Deque stack = new ArrayDeque(); + stack.push(this); + int tabLength = 0; + + while (!stack.isEmpty()) { + ASTNode next = stack.peek(); + + if (!next.visited) { + sb.append(StringUtils.repeat(" ", tabLength * 3)); + sb.append(next.toString()); + sb.append("\n"); + + if (next.children != null) { + for (int i = next.children.size() - 1 ; i >= 0 ; i--) { + stack.push((ASTNode)next.children.get(i)); + } } + + tabLength++; + next.visited = true; + } else { + tabLength--; + next.visited = false; + stack.pop(); } } + return sb; } @@ -238,34 +296,55 @@ public String toStringTree() { } private String toStringTree(ASTNode rootNode) { - this.rootNode = rootNode; - startIndx = rootNode.getMemoizedStringLen(); - // Leaf node - String str; - if ( children==null || children.size()==0 ) { - str = this.toString(); - rootNode.addtoMemoizedString(this.getType() != HiveParser.StringLiteral ? str.toLowerCase() : str); - endIndx = rootNode.getMemoizedStringLen(); - return this.getType() != HiveParser.StringLiteral ? str.toLowerCase() : str; - } + Deque stack = new ArrayDeque(); + stack.push(this); + + while (!stack.isEmpty()) { + ASTNode next = stack.peek(); + if (!next.visited) { + if (next.parent != null && next.parent.getChildCount() > 1 && + next != next.parent.getChild(0)) { + rootNode.addtoMemoizedString(" "); + } - if ( !isNil() ) { - rootNode.addtoMemoizedString("("); - str = this.toString(); - rootNode.addtoMemoizedString((this.getType() == HiveParser.StringLiteral || null == str) ? str : str.toLowerCase()); - rootNode.addtoMemoizedString(" "); - } - for (int i = 0; children!=null && i < children.size(); i++) { - ASTNode t = (ASTNode)children.get(i); - if ( i>0 ) { - rootNode.addtoMemoizedString(" "); + next.rootNode = rootNode; + next.startIndx = rootNode.getMemoizedStringLen(); + + // Leaf + if (next.children == null || next.children.size() == 0) { + String str = next.toString(); + rootNode.addtoMemoizedString(next.getType() != HiveParser.StringLiteral ? str.toLowerCase() : str); + next.endIndx = rootNode.getMemoizedStringLen(); + stack.pop(); + continue; + } + + if ( !next.isNil() ) { + rootNode.addtoMemoizedString("("); + String str = next.toString(); + rootNode.addtoMemoizedString((next.getType() == HiveParser.StringLiteral || null == str) ? str : str.toLowerCase()); + rootNode.addtoMemoizedString(" "); + } + + if (next.children != null) { + for (int i = next.children.size() - 1 ; i >= 0 ; i--) { + stack.push((ASTNode)next.children.get(i)); + } + } + + next.visited = true; + } else { + if ( !next.isNil() ) { + rootNode.addtoMemoizedString(")"); + } + next.endIndx = rootNode.getMemoizedStringLen(); + next.visited = false; + stack.pop(); } - t.toStringTree(rootNode); - } - if ( !isNil() ) { - rootNode.addtoMemoizedString(")"); + } - endIndx = rootNode.getMemoizedStringLen(); + return rootNode.getMemoizedSubString(startIndx, endIndx); } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c38699d..318f207 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -24,10 +24,10 @@ import java.io.IOException; import java.io.Serializable; import java.security.AccessControlException; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; -import java.util.Comparator; +import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -65,7 +65,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Order; -import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryProperties; import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator; @@ -188,7 +187,6 @@ import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.ResourceType; -import org.apache.hadoop.hive.ql.stats.StatsFactory; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; @@ -11184,98 +11182,103 @@ private void processPositionAlias(ASTNode ast) throws SemanticException { isByPos = true; } - if (ast.getChildCount() == 0) { - return; - } + Deque stack = new ArrayDeque(); + stack.push(ast); - boolean isAllCol; - ASTNode selectNode = null; - ASTNode groupbyNode = null; - ASTNode orderbyNode = null; - - // get node type - int child_count = ast.getChildCount(); - for (int child_pos = 0; child_pos < child_count; ++child_pos) { - ASTNode node = (ASTNode) ast.getChild(child_pos); - int type = node.getToken().getType(); - if (type == HiveParser.TOK_SELECT) { - selectNode = node; - } else if (type == HiveParser.TOK_GROUPBY) { - groupbyNode = node; - } else if (type == HiveParser.TOK_ORDERBY) { - orderbyNode = node; - } - } - - if (selectNode != null) { - int selectExpCnt = selectNode.getChildCount(); - - // replace each of the position alias in GROUPBY with the actual column name - if (groupbyNode != null) { - for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { - ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); - if (node.getToken().getType() == HiveParser.Number) { - if (isByPos) { - int pos = Integer.parseInt(node.getText()); - if (pos > 0 && pos <= selectExpCnt) { - groupbyNode.setChild(child_pos, - selectNode.getChild(pos - 1).getChild(0)); - } else { - throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( - "Position alias: " + pos + " does not exist\n" + - "The Select List is indexed from 1 to " + selectExpCnt)); - } - } else { - warn("Using constant number " + node.getText() + - " in group by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); - } - } - } + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); + + if (next.getChildCount() == 0) { + continue; } - // replace each of the position alias in ORDERBY with the actual column name - if (orderbyNode != null) { - isAllCol = false; - for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) { - ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0); - if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { - isAllCol = true; - } + boolean isAllCol; + ASTNode selectNode = null; + ASTNode groupbyNode = null; + ASTNode orderbyNode = null; + + // get node type + int child_count = next.getChildCount(); + for (int child_pos = 0; child_pos < child_count; ++child_pos) { + ASTNode node = (ASTNode) next.getChild(child_pos); + int type = node.getToken().getType(); + if (type == HiveParser.TOK_SELECT) { + selectNode = node; + } else if (type == HiveParser.TOK_GROUPBY) { + groupbyNode = node; + } else if (type == HiveParser.TOK_ORDERBY) { + orderbyNode = node; } - for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { - ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos); - ASTNode node = (ASTNode) colNode.getChild(0); - if (node.getToken().getType() == HiveParser.Number) { - if( isByPos ) { - if (!isAllCol) { + } + + if (selectNode != null) { + int selectExpCnt = selectNode.getChildCount(); + + // replace each of the position alias in GROUPBY with the actual column name + if (groupbyNode != null) { + for (int child_pos = 0; child_pos < groupbyNode.getChildCount(); ++child_pos) { + ASTNode node = (ASTNode) groupbyNode.getChild(child_pos); + if (node.getToken().getType() == HiveParser.Number) { + if (isByPos) { int pos = Integer.parseInt(node.getText()); if (pos > 0 && pos <= selectExpCnt) { - colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); + groupbyNode.setChild(child_pos, + selectNode.getChild(pos - 1).getChild(0)); } else { throw new SemanticException( - ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( + ErrorMsg.INVALID_POSITION_ALIAS_IN_GROUPBY.getMsg( "Position alias: " + pos + " does not exist\n" + "The Select List is indexed from 1 to " + selectExpCnt)); } } else { - throw new SemanticException( - ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); + warn("Using constant number " + node.getText() + + " in group by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); + } + } + } + } + + // replace each of the position alias in ORDERBY with the actual column name + if (orderbyNode != null) { + isAllCol = false; + for (int child_pos = 0; child_pos < selectNode.getChildCount(); ++child_pos) { + ASTNode node = (ASTNode) selectNode.getChild(child_pos).getChild(0); + if (node.getToken().getType() == HiveParser.TOK_ALLCOLREF) { + isAllCol = true; + } + } + for (int child_pos = 0; child_pos < orderbyNode.getChildCount(); ++child_pos) { + ASTNode colNode = (ASTNode) orderbyNode.getChild(child_pos); + ASTNode node = (ASTNode) colNode.getChild(0); + if (node.getToken().getType() == HiveParser.Number) { + if( isByPos ) { + if (!isAllCol) { + int pos = Integer.parseInt(node.getText()); + if (pos > 0 && pos <= selectExpCnt) { + colNode.setChild(0, selectNode.getChild(pos - 1).getChild(0)); + } else { + throw new SemanticException( + ErrorMsg.INVALID_POSITION_ALIAS_IN_ORDERBY.getMsg( + "Position alias: " + pos + " does not exist\n" + + "The Select List is indexed from 1 to " + selectExpCnt)); + } + } else { + throw new SemanticException( + ErrorMsg.NO_SUPPORTED_ORDERBY_ALLCOLREF_POS.getMsg()); + } + } else { //if not using position alias and it is a number. + warn("Using constant number " + node.getText() + + " in order by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); } - } else { //if not using position alias and it is a number. - warn("Using constant number " + node.getText() + - " in order by. If you try to use position alias when hive.groupby.orderby.position.alias is false, the position alias will be ignored."); } } } } - } - // Recursively process through the children ASTNodes - for (int child_pos = 0; child_pos < child_count; ++child_pos) { - processPositionAlias((ASTNode) ast.getChild(child_pos)); + for (int i = next.getChildren().size() - 1; i >= 0; i--) { + stack.push((ASTNode)next.getChildren().get(i)); + } } - return; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 362a285..bd771f9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hive.ql.parse; +import java.util.ArrayDeque; import java.util.ArrayList; +import java.util.Deque; import java.util.List; import java.util.Map; @@ -196,14 +198,21 @@ ASTNode remove() throws SemanticException { } private static void findSubQueries(ASTNode node, List subQueries) { - switch(node.getType()) { - case HiveParser.TOK_SUBQUERY_EXPR: - subQueries.add(node); - break; - default: - int childCount = node.getChildCount(); - for(int i=0; i < childCount; i++) { - findSubQueries((ASTNode) node.getChild(i), subQueries); + Deque stack = new ArrayDeque(); + stack.push(node); + + while (!stack.isEmpty()) { + ASTNode next = stack.pop(); + + switch(next.getType()) { + case HiveParser.TOK_SUBQUERY_EXPR: + subQueries.add(next); + break; + default: + int childCount = next.getChildCount(); + for(int i = childCount - 1; i >= 0; i--) { + stack.push((ASTNode) next.getChild(i)); + } } } }