diff --git ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java index d22009a..bad4f48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java +++ ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java @@ -363,6 +363,8 @@ INVALID_BIGTABLE_MAPJOIN(10246, "{0} table chosen for streaming is not valid", true), MISSING_OVER_CLAUSE(10247, "Missing over clause for function : "), PARTITION_SPEC_TYPE_MISMATCH(10248, "Cannot add partition column {0} of type {1} as it cannot be converted to type {2}", true), + UNSUPPORTED_SUBQUERY_EXPRESSION(10249, "Unsupported SubQuery Expression"), + INVALID_SUBQUERY_EXPRESSION(10250, "Invalid SubQuery expression"), SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."), SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. " diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index b575e22..dc26b0f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -99,7 +99,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.Context; -import org.apache.hadoop.hive.ql.Driver; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; @@ -126,6 +125,8 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.parse.ASTNode; +import org.apache.hadoop.hive.ql.parse.ASTNodeOrigin; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.BaseWork; import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx; @@ -694,6 +695,30 @@ public void write(Kryo kryo, Output output, CommonToken token) { output.writeString(token.getText()); } } + + private static class ASTNodeOriginTokenSerializer extends com.esotericsoftware.kryo.Serializer { + @Override + public ASTNodeOrigin read(Kryo kryo, Input input, Class clazz) { + + String objType = input.readString(); + String objName = input.readString(); + String objDefn = input.readString(); + String usageAlias = input.readString(); + ASTNode usageNode = kryo.readObjectOrNull(input, ASTNode.class); + + return new ASTNodeOrigin(objType, objName, objDefn, usageAlias, usageNode); + } + + @Override + public void write(Kryo kryo, Output output, ASTNodeOrigin node) { + output.writeString(node.getObjectType()); + output.writeString(node.getObjectName()); + output.writeString(node.getObjectDefinition()); + output.writeString(node.getUsageAlias()); + kryo.writeObjectOrNull(output, node.getUsageNode(), ASTNode.class); + } + } + private static void serializePlan(Object plan, OutputStream out, Configuration conf, boolean cloningPlan) { PerfLogger perfLogger = PerfLogger.getPerfLogger(); perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SERIALIZE_PLAN); @@ -855,6 +880,7 @@ protected synchronized Kryo initialValue() { kryo.setClassLoader(Thread.currentThread().getContextClassLoader()); kryo.register(CommonToken.class, new CommonTokenSerializer()); kryo.register(java.sql.Date.class, new SqlDateSerializer()); + kryo.register(ASTNodeOrigin.class, new ASTNodeOriginTokenSerializer()); return kryo; }; }; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 1f7b247..777bd88 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -301,6 +301,10 @@ TOK_WINDOWVALUES; TOK_WINDOWRANGE; TOK_IGNOREPROTECTION; TOK_EXCHANGEPARTITION; +TOK_SUBQUERY_EXPR; +TOK_SUBQUERY_OP; +TOK_SUBQUERY_OP_NOTIN; +TOK_SUBQUERY_OP_NOTEXISTS; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 5bdf477..5a23751 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -371,6 +371,11 @@ precedenceEqualOperator precedenceEqualNegatableOperator | EQUAL | EQUAL_NS | NOTEQUAL | LESSTHANOREQUALTO | LESSTHAN | GREATERTHANOREQUALTO | GREATERTHAN ; +subQueryExpression + : + LPAREN! selectStatement RPAREN! + ; + precedenceEqualExpression : (left=precedenceBitwiseOrExpression -> $left) @@ -379,8 +384,12 @@ precedenceEqualExpression -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpression $notExpr)) | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) -> ^(precedenceEqualOperator $precedenceEqualExpression $equalExpr) + | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) subQueryExpression $left) | (KW_NOT KW_IN expressions) -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)) + | (KW_IN LPAREN KW_SELECT)=> (KW_IN subQueryExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $left) | (KW_IN expressions) -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions) | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) @@ -388,6 +397,7 @@ precedenceEqualExpression | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max) )* + | (KW_EXISTS LPAREN KW_SELECT)=> (KW_EXISTS subQueryExpression) -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_EXISTS) subQueryExpression) ; expressions diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java new file mode 100644 index 0000000..4836202 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -0,0 +1,596 @@ +package org.apache.hadoop.hive.ql.parse; + +import java.util.ArrayList; +import java.util.List; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; + +public class QBSubQuery { + + public static enum SubQueryOperatorType { + EXISTS, + NOT_EXISTS, + IN, + NOT_IN; + + public static SubQueryOperatorType get(ASTNode opNode) throws SemanticException { + switch(opNode.getType()) { + case HiveParser.KW_EXISTS: + return EXISTS; + case HiveParser.TOK_SUBQUERY_OP_NOTEXISTS: + return NOT_EXISTS; + case HiveParser.KW_IN: + return IN; + case HiveParser.TOK_SUBQUERY_OP_NOTIN: + return NOT_IN; + default: + throw new SemanticException(SemanticAnalyzer.generateErrorMessage(opNode, + "Operator not supported in SubQuery use.")); + } + } + } + + public static class SubQueryOperator { + private final ASTNode ast; + private final SubQueryOperatorType type; + + public SubQueryOperator(ASTNode ast, SubQueryOperatorType type) { + super(); + this.ast = ast; + this.type = type; + } + + public ASTNode getAst() { + return ast; + } + + public SubQueryOperatorType getType() { + return type; + } + + } + + /* + * An expression is either the left/right side of an Equality predicate in the SubQuery where + * clause; or it is the entire conjunct. For e.g. if the Where Clause for a SubQuery is: + * where R1.X = R2.Y and R2.Z > 7 + * Then the expressions analyzed are R1.X, R2.X ( the left and right sides of the Equality + * predicate); and R2.Z > 7. + * + * The ExprType tracks whether the expr: + * - has a reference to a SubQuery table source + * - has a reference to Outer(parent) Query table source + */ + static enum ExprType { + REFERS_NONE(false, false) { + @Override + public ExprType combine(ExprType other) { + return other; + } + }, + REFERS_PARENT(true, false) { + @Override + public ExprType combine(ExprType other) { + switch(other) { + case REFERS_SUBQUERY: + case REFERS_BOTH: + return REFERS_BOTH; + default: + return this; + } + } + }, + REFERS_SUBQUERY(false, true) { + @Override + public ExprType combine(ExprType other) { + switch(other) { + case REFERS_PARENT: + case REFERS_BOTH: + return REFERS_BOTH; + default: + return this; + } + } + }, + REFERS_BOTH(true,true) { + @Override + public ExprType combine(ExprType other) { + return this; + } + }; + + final boolean refersParent; + final boolean refersSubQuery; + + ExprType(boolean refersParent, boolean refersSubQuery) { + this.refersParent = refersParent; + this.refersSubQuery = refersSubQuery; + } + + public boolean refersParent() { + return refersParent; + } + public boolean refersSubQuery() { + return refersSubQuery; + } + public abstract ExprType combine(ExprType other); + } + + static class Conjunct { + private final ASTNode leftExpr; + private final ASTNode rightExpr; + private final ExprType leftExprType; + private final ExprType rightExprType; + + public Conjunct(ASTNode leftExpr, ASTNode rightExpr, ExprType leftExprType, + ExprType rightExprType) { + super(); + this.leftExpr = leftExpr; + this.rightExpr = rightExpr; + this.leftExprType = leftExprType; + this.rightExprType = rightExprType; + } + public ASTNode getLeftExpr() { + return leftExpr; + } + public ASTNode getRightExpr() { + return rightExpr; + } + public ExprType getLeftExprType() { + return leftExprType; + } + public ExprType getRightExprType() { + return rightExprType; + } + + boolean eitherSideRefersBoth() { + if ( leftExprType == ExprType.REFERS_BOTH ) { + return true; + } else if ( rightExpr != null ) { + return rightExprType == ExprType.REFERS_BOTH; + } + return false; + } + + boolean isCorrelated() { + if ( rightExpr != null ) { + return leftExprType.combine(rightExprType) == ExprType.REFERS_BOTH; + } + return false; + } + + boolean refersOuterOnly() { + if ( rightExpr == null ) { + return leftExprType == ExprType.REFERS_PARENT; + } + return leftExprType.combine(rightExprType) == ExprType.REFERS_PARENT; + } + } + + class ConjunctAnalyzer { + RowResolver parentQueryRR; + NodeProcessor defaultExprProcessor; + Stack stack; + + ConjunctAnalyzer(RowResolver parentQueryRR) { + this.parentQueryRR = parentQueryRR; + defaultExprProcessor = new DefaultExprProcessor(); + stack = new Stack(); + } + + /* + * 1. On encountering a DOT, we attempt to resolve the leftmost name + * to the Parent Query. + * 2. An unqualified name is assumed to be a SubQuery reference. + * We don't attempt to resolve this to the Parent; because + * we require all Parent column references to be qualified. + * 3. All other expressions have a Type based on their children. + * An Expr w/o children is assumed to refer to neither. + */ + private ExprType analyzeExpr(ASTNode expr) { + ExprNodeDesc exprNode; + if ( expr.getType() == HiveParser.DOT) { + ASTNode dot = firstDot(expr); + exprNode = resolveDot(dot); + if ( exprNode != null ) { + return ExprType.REFERS_PARENT; + } + return ExprType.REFERS_SUBQUERY; + } else if ( expr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + return ExprType.REFERS_SUBQUERY; + } else { + ExprType exprType = ExprType.REFERS_NONE; + int cnt = expr.getChildCount(); + for(int i=0; i < cnt; i++) { + ASTNode child = (ASTNode) expr.getChild(i); + exprType = exprType.combine(analyzeExpr(child)); + } + return exprType; + } + } + + /* + * 1. The only correlation operator we check for is EQUAL; because that is + * the one for which we can do a Algebraic transformation. + * 2. For expressions that are not an EQUAL predicate, we treat them as conjuncts + * having only 1 side. These should only contain references to the SubQuery + * table sources. + * 3. For expressions that are an EQUAL predicate; we analyze each side and let the + * left and right exprs in the Conjunct object. + * + * @return Conjunct contains details on the left and right side of the conjunct expression. + */ + Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { + int type = conjunct.getType(); + + if ( type == HiveParser.EQUAL ) { + ASTNode left = (ASTNode) conjunct.getChild(0); + ASTNode right = (ASTNode) conjunct.getChild(1); + ExprType leftType = analyzeExpr(left); + ExprType rightType = analyzeExpr(right); + + return new Conjunct(left, right, leftType, rightType); + } else { + ExprType sqExprType = analyzeExpr(conjunct); + return new Conjunct(conjunct, null, sqExprType, null); + } + } + + /* + * Try to resolve a qualified name as a column reference on the Parent Query's RowResolver. + * Apply this logic on the leftmost(first) dot in an AST tree. + */ + protected ExprNodeDesc resolveDot(ASTNode node) { + try { + TypeCheckCtx tcCtx = new TypeCheckCtx(parentQueryRR); + String str = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(1).getText()); + ExprNodeDesc idDesc = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); + return (ExprNodeDesc) defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc); + } catch(SemanticException se) { + return null; + } + } + + /* + * We want to resolve the leftmost name to the Parent Query's RR. + * Hence we do a left walk down the AST, until we reach the bottom most DOT. + */ + protected ASTNode firstDot(ASTNode dot) { + ASTNode firstChild = (ASTNode) dot.getChild(0); + if ( firstChild != null && firstChild.getType() == HiveParser.DOT) { + return firstDot(firstChild); + } + return dot; + } + + } + + private final String outerQueryId; + private final int sqIdx; + private final String alias; + private final ASTNode subQueryAST; + private final ASTNode parentQueryExpression; + private final SubQueryOperator operator; + private boolean containsAggregationExprs; + private boolean hasCorrelation; + private ASTNode joinConditionAST; + private JoinType joinType; + private ASTNode postJoinConditionAST; + private int numCorrExprsinSQ; + private List subQueryJoinAliasExprs; + private final ASTNodeOrigin originalSQASTOrigin; + + public QBSubQuery(String outerQueryId, + int sqIdx, + ASTNode subQueryAST, + ASTNode parentQueryExpression, + SubQueryOperator operator, + ASTNode originalSQAST, + Context ctx) { + super(); + this.subQueryAST = subQueryAST; + this.parentQueryExpression = parentQueryExpression; + this.operator = operator; + this.outerQueryId = outerQueryId; + this.sqIdx = sqIdx; + this.alias = "sq_" + this.sqIdx; + this.numCorrExprsinSQ = 0; + String s = ctx.getTokenRewriteStream().toString(originalSQAST.getTokenStartIndex(), originalSQAST.getTokenStopIndex()); + originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); + } + + public ASTNode getSubQueryAST() { + return subQueryAST; + } + public ASTNode getOuterQueryExpression() { + return parentQueryExpression; + } + public SubQueryOperator getOperator() { + return operator; + } + + void validateAndRewritePlan(RowResolver outerQueryRR) throws SemanticException { + + ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); + + int selectExprStart = 0; + if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) { + selectExprStart = 1; + } + + /* + * Check.5.h :: For In and Not In the SubQuery must implicitly or + * explicitly only contain one select item. + */ + if ( operator.getType() != SubQueryOperatorType.EXISTS && + operator.getType() != SubQueryOperatorType.NOT_EXISTS && + selectClause.getChildCount() - selectExprStart > 1 ) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + subQueryAST, "SubQuery can contain only 1 item in Select List.")); + } + + containsAggregationExprs = false; + boolean containsWindowing = false; + for(int i= selectExprStart; i < selectClause.getChildCount(); i++ ) { + + ASTNode selectItem = (ASTNode) selectClause.getChild(i); + int r = SubQueryUtils.checkAggOrWindowing(selectItem); + + containsWindowing = containsWindowing | ( r == 2); + containsAggregationExprs = containsAggregationExprs | ( r == 1 ); + } + + rewrite(outerQueryRR); + + SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin); + + /* + * Restriction.14.h :: Correlated Sub Queries cannot contain Windowing clauses. + */ + if ( containsWindowing && hasCorrelation ) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueryAST, "Correlated Sub Queries cannot contain Windowing clauses.")); + } + + /* + * Check.4.h :: For Exists and Not Exists, the Sub Query must + * have 1 or more correlated predicates. + */ + if ( ( operator.getType() == SubQueryOperatorType.EXISTS || + operator.getType() == SubQueryOperatorType.NOT_EXISTS ) && + !hasCorrelation ) { + throw new SemanticException(ErrorMsg.INVALID_SUBQUERY_EXPRESSION.getMsg( + subQueryAST, "For Exists/Not Exists operator SubQuery must be Correlated.")); + } + + } + + private void setJoinType() { + if ( operator.getType() == SubQueryOperatorType.NOT_IN || + operator.getType() == SubQueryOperatorType.NOT_EXISTS ) { + joinType = JoinType.LEFTOUTER; + } else { + joinType = JoinType.LEFTSEMI; + } + } + + void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR) throws SemanticException { + ASTNode parentQueryJoinCond = null; + + if ( parentQueryExpression != null ) { + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( + getOuterQueryExpression(), + alias, + sqRR); + } + joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST); + setJoinType(); + + if ( joinType == JoinType.LEFTOUTER ) { + if ( operator.getType() == SubQueryOperatorType.NOT_EXISTS && hasCorrelation ) { + postJoinConditionAST = SubQueryUtils.buildPostJoinNullCheck(subQueryJoinAliasExprs); + } else if ( operator.getType() == SubQueryOperatorType.NOT_IN ) { + postJoinConditionAST = SubQueryUtils.buildOuterJoinPostCond(alias, sqRR); + } + } + + SubQueryUtils.setOriginDeep(joinConditionAST, originalSQASTOrigin); + SubQueryUtils.setOriginDeep(postJoinConditionAST, originalSQASTOrigin); + } + + ASTNode updateOuterQueryFilter(ASTNode outerQryFilter) { + if (postJoinConditionAST == null ) { + return outerQryFilter; + } else if ( outerQryFilter == null ) { + return postJoinConditionAST; + } + ASTNode node = SubQueryUtils.andAST(outerQryFilter, postJoinConditionAST); + node.setOrigin(originalSQASTOrigin); + return node; + } + + String getNextCorrExprAlias() { + return "sq_corr_" + numCorrExprsinSQ++; + } + + /* + * - If the SubQuery has no where clause, there is nothing to rewrite. + * - Decompose SubQuery where clause into list of Top level conjuncts. + * - For each conjunct + * - Break down the conjunct into (LeftExpr, LeftExprType, RightExpr, + * RightExprType) + * - If the top level operator is an Equality Operator we will break + * it down into left and right; in all other case there is only a + * lhs. + * - The ExprType is based on whether the Expr. refers to the Parent + * Query table sources, refers to the SubQuery sources or both. + * - We assume an unqualified Column refers to a SubQuery table source. + * This is because we require Parent Column references to be qualified + * within the SubQuery. + * - If the lhs or rhs expr refers to both Parent and SubQuery sources, + * we flag this as Unsupported. + * - If the conjunct as a whole, only refers to the Parent Query sources, + * we flag this as an Error. + * - A conjunct is Correlated if the lhs refers to SubQuery sources and rhs + * refers to Parent Query sources or the reverse. + * - Say the lhs refers to SubQuery and rhs refers to Parent Query sources; the + * other case is handled analogously. + * - remove this conjunct from the SubQuery where clause. + * - for the SubQuery expression(lhs) construct a new alias + * - in the correlated predicate, replace the SubQuery + * expression(lhs) with the alias AST. + * - add this altered predicate to the Join predicate tracked by the + * QBSubQuery object. + * - add the alias AST to a list of subQueryJoinAliasExprs. This + * list is used in the case of Outer Joins to add null check + * predicates to the Outer Query's where clause. + * - Add the SubQuery expression with the alias as a SelectItem to + * the SubQuery's SelectList. + * - In case this SubQuery contains aggregation expressions add this SubQuery + * expression to its GroupBy; add it to the front of the GroupBy. + * - If predicate is not correlated, let it remain in the SubQuery + * where clause. + */ + private void rewrite(RowResolver parentQueryRR) throws SemanticException { + ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); + ASTNode whereClause = null; + if ( subQueryAST.getChild(1).getChildCount() > 2 && + subQueryAST.getChild(1).getChild(2).getType() == HiveParser.TOK_WHERE ) { + whereClause = (ASTNode) subQueryAST.getChild(1).getChild(2); + } + + if ( whereClause == null ) { + return; + } + + ASTNode searchCond = (ASTNode) whereClause.getChild(0); + List conjuncts = new ArrayList(); + SubQueryUtils.extractConjuncts(searchCond, conjuncts); + + ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR); + ASTNode sqNewSearchCond = null; + + for(ASTNode conjunctAST : conjuncts) { + Conjunct conjunct = conjunctAnalyzer.analyzeConjunct(conjunctAST); + + /* + * Restriction.11.m :: A SubQuery predicate that refers to an Outer + * Query column must be a valid Join predicate. + */ + if ( conjunct.eitherSideRefersBoth() ) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + conjunctAST, + "SubQuery expression refers to both Parent and SubQuery expressions and " + + "is not a valid join condition.")); + } + + /* + * Check.12.h :: SubQuery predicates cannot only refer to Outer Query columns. + */ + if ( conjunct.refersOuterOnly() ) { + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + conjunctAST, + "SubQuery expression refers to Outer query expressions only.")); + } + + if ( conjunct.isCorrelated() ) { + hasCorrelation = true; + subQueryJoinAliasExprs = new ArrayList(); + String exprAlias = getNextCorrExprAlias(); + ASTNode sqExprAlias = SubQueryUtils.createAliasAST(exprAlias); + ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias); + + if ( conjunct.getLeftExprType().refersSubQuery() ) { + ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(conjunctAST, sqExprForCorr, true); + joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); + subQueryJoinAliasExprs.add(sqExprForCorr); + ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getLeftExpr(), sqExprAlias); + selectClause.addChild(selExpr); + if ( containsAggregationExprs ) { + ASTNode gBy = getSubQueryGroupByAST(); + SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); + } + } else { + ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate(conjunctAST, sqExprForCorr, false); + joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); + subQueryJoinAliasExprs.add(sqExprForCorr); + ASTNode selExpr = SubQueryUtils.createSelectItem(conjunct.getRightExpr(), sqExprAlias); + selectClause.addChild(selExpr); + if ( containsAggregationExprs ) { + ASTNode gBy = getSubQueryGroupByAST(); + SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getRightExpr()); + } + } + } else { + sqNewSearchCond = SubQueryUtils.andAST(sqNewSearchCond, conjunctAST); + } + } + + if ( sqNewSearchCond != searchCond ) { + if ( sqNewSearchCond == null ) { + sqNewSearchCond = SubQueryUtils.constructTrueCond(); + } + whereClause.setChild(0, sqNewSearchCond); + } + + } + + /* + * called if the SubQuery is Agg and Correlated. + * if SQ doesn't have a GroupBy, it is added to the SQ AST. + */ + private ASTNode getSubQueryGroupByAST() { + ASTNode groupBy = null; + if ( subQueryAST.getChild(1).getChildCount() > 3 && + subQueryAST.getChild(1).getChild(3).getType() == HiveParser.TOK_GROUPBY ) { + groupBy = (ASTNode) subQueryAST.getChild(1).getChild(3); + } + + if ( groupBy != null ) { + return groupBy; + } + + groupBy = SubQueryUtils.buildGroupBy(); + + List newChildren = new ArrayList(); + newChildren.add(groupBy); + if ( subQueryAST.getChildCount() > 3) { + for( int i = subQueryAST.getChildCount() - 1; i >= 3; i-- ) { + ASTNode child = (ASTNode) subQueryAST.getChild(i); + newChildren.add(child); + } + } + + for(ASTNode child : newChildren ) { + subQueryAST.addChild(child); + } + + return groupBy; + } + + + public String getOuterQueryId() { + return outerQueryId; + } + + public JoinType getJoinType() { + return joinType; + } + + public String getAlias() { + return alias; + } + + public ASTNode getJoinConditionAST() { + return joinConditionAST; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 8fae6ea..11fa43a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1758,11 +1758,93 @@ private Operator genHavingPlan(String dest, QB qb, Operator input) } @SuppressWarnings("nls") - private Operator genFilterPlan(String dest, QB qb, Operator input) + private Operator genFilterPlan(String dest, QB qb, Operator input, + Map aliasToOpInfo) throws SemanticException { + OpParseContext inputCtx = opParseCtx.get(input); + RowResolver inputRR = inputCtx.getRowResolver(); ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - return genFilterPlan(qb, (ASTNode) whereExpr.getChild(0), input); + + /* + * Handling of SubQuery Expressions: + * if "Where clause contains no SubQuery expressions" then + * -->[true] ===CONTINUE_FILTER_PROCESSING=== + * else + * -->[false] "extract SubQuery expressions\n from Where clause" + * if "this is a nested SubQuery or \nthere are more than 1 SubQuery expressions" then + * -->[yes] "throw Unsupported Error" + * else + * --> "Rewrite Search condition to \nremove SubQuery predicate" + * --> "build QBSubQuery" + * --> "extract correlated predicates \nfrom Where Clause" + * --> "add correlated Items to \nSelect List and Group By" + * --> "construct Join Predicate \nfrom correlation predicates" + * --> "Generate Plan for\n modified SubQuery" + * --> "Build the Join Condition\n for Parent Query to SubQuery join" + * --> "Build the QBJoinTree from the Join condition" + * --> "Update Parent Query Filter\n with any Post Join conditions" + * --> ===CONTINUE_FILTER_PROCESSING=== + * endif + * endif + */ + ASTNode searchCond = (ASTNode) whereExpr.getChild(0); + List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); + + if ( subQueriesInOriginalTree != null ) { + + /* + * Restriction.8.m :: We allow only 1 SubQuery expression per Query. + */ + if (subQueriesInOriginalTree.size() > 1 ) { + + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQueriesInOriginalTree.get(1), "Only 1 SubQuery expression is supported.")); + } + + /* + * Clone the Search AST; apply all rewrites on the clone. + */ + ASTNode clonedSearchCond = (ASTNode) ParseDriver.adaptor.dupTree(searchCond); + List subQueries = SubQueryUtils.findSubQueries(clonedSearchCond); + + for(int i=0; i < subQueries.size(); i++) { + ASTNode subQueryAST = subQueries.get(i); + ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); + + int sqIdx = i+1; + clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); + + QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), + sqIdx, subQueryAST, originalSubQueryAST, ctx); + + subQuery.validateAndRewritePlan(inputRR); + + QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); + Phase1Ctx ctx_1 = initPhase1Ctx(); + doPhase1(subQuery.getSubQueryAST(), qbSQ, ctx_1); + getMetaData(qbSQ); + Operator sqOperator = genPlan(qbSQ); + aliasToOpInfo.put(subQuery.getAlias(), sqOperator); + RowResolver sqRR = opParseCtx.get(sqOperator).getRowResolver(); + + /* + * Gen Join between outer Operator and SQ op + */ + subQuery.buildJoinCondition(inputRR, sqRR); + QBJoinTree joinTree = genSQJoinTree(qb, subQuery, + input, + aliasToOpInfo); + /* + * push filters only for this QBJoinTree. Child QBJoinTrees have already been handled. + */ + pushJoinFilters(qb, joinTree, aliasToOpInfo, false); + input = genJoinOperator(qbSQ, joinTree, aliasToOpInfo, input); + searchCond = subQuery.updateOuterQueryFilter(clonedSearchCond); + } + } + + return genFilterPlan(qb, searchCond, input); } /** @@ -4038,7 +4120,8 @@ private Operator genGroupByPlan1MR(String dest, QB qb, Operator input) } @SuppressWarnings({"nls"}) - private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operator input) + private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operator input, + Map aliasToOpInfo) throws SemanticException { QBParseInfo parseInfo = qb.getParseInfo(); @@ -4118,7 +4201,7 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat curr = forwardOp; if (parseInfo.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, forwardOp); + curr = genFilterPlan(dest, qb, forwardOp, aliasToOpInfo); } // Generate GroupbyOperator @@ -6035,17 +6118,21 @@ private Operator genJoinReduceSinkChild(QB qb, QBJoinTree joinTree, } private Operator genJoinOperator(QB qb, QBJoinTree joinTree, - Map map) throws SemanticException { + Map map, + Operator joiningOp) throws SemanticException { QBJoinTree leftChild = joinTree.getJoinSrc(); - Operator joinSrcOp = null; - if (leftChild != null) { - Operator joinOp = genJoinOperator(qb, leftChild, map); + Operator joinSrcOp = joiningOp instanceof JoinOperator ? joiningOp : null; + + if (joinSrcOp == null && leftChild != null) { + joinSrcOp = genJoinOperator(qb, leftChild, map, null); + } + + if ( joinSrcOp != null ) { ArrayList filter = joinTree.getFiltersForPushing().get(0); for (ASTNode cond : filter) { - joinOp = genFilterPlan(qb, cond, joinOp); + joinSrcOp = genFilterPlan(qb, cond, joinSrcOp); } - - joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinOp, null, 0); + joinSrcOp = genJoinReduceSinkChild(qb, joinTree, joinSrcOp, null, 0); } Operator[] srcOps = new Operator[joinTree.getBaseSrc().length]; @@ -6240,7 +6327,7 @@ private void genJoinOperatorTypeCheck(Operator left, Operator[] right) private Operator genJoinPlan(QB qb, Map map) throws SemanticException { QBJoinTree joinTree = qb.getQbJoinTree(); - Operator joinOp = genJoinOperator(qb, joinTree, map); + Operator joinOp = genJoinOperator(qb, joinTree, map, null); return joinOp; } @@ -6250,8 +6337,20 @@ private Operator genJoinPlan(QB qb, Map map) */ private void pushJoinFilters(QB qb, QBJoinTree joinTree, Map map) throws SemanticException { - if (joinTree.getJoinSrc() != null) { - pushJoinFilters(qb, joinTree.getJoinSrc(), map); + pushJoinFilters(qb, joinTree, map, true); + } + + /** + * Extract the filters from the join condition and push them on top of the + * source operators. This procedure traverses the query tree recursively, + */ + private void pushJoinFilters(QB qb, QBJoinTree joinTree, + Map map, + boolean recursively) throws SemanticException { + if ( recursively ) { + if (joinTree.getJoinSrc() != null) { + pushJoinFilters(qb, joinTree.getJoinSrc(), map); + } } ArrayList> filters = joinTree.getFiltersForPushing(); int pos = 0; @@ -6404,6 +6503,130 @@ private QBJoinTree genUniqueJoinTree(QB qb, ASTNode joinParseTree, return joinTree; } + /* + * Setup a QBJoinTree between a SubQuery and its Parent Query. The Parent Query is the lhs of the Join. + * + * The Parent Query is represented by the last Operator needed to process its From Clause. In case of a + * single table Query this will be a TableScan, but it can be a Join Operator if the Parent Query contains + * Join clauses, or in case of a single source from clause, the source could be a SubQuery or a PTF invocation. + * + * We setup the QBJoinTree with the above constrains in place. So: + * - the lhs of the QBJoinTree can be a another QBJoinTree if the Parent Query operator is a JoinOperator. + * In this case we get its QBJoinTree from the 'joinContext' + * - the rhs is always a reference to the SubQuery. Its alias is obtained from the QBSubQuery object. + * + * The QBSubQuery also provides the Joining Condition AST. The Joining Condition has been transformed in QBSubQuery setup, + * before this call. The Joining condition has any correlated predicates and a predicate for joining the Parent Query expression + * with the SubQuery. + * + * The QBSubQuery also specifies what kind of Join to construct. + * + * Given this information, once we initialize the QBJoinTree, we call the 'parseJoinCondition' method to validate and parse Join conditions. + */ + private QBJoinTree genSQJoinTree(QB qb, QBSubQuery subQuery, + Operator joiningOp, + Map aliasToOpInfo) + throws SemanticException { + QBJoinTree joinTree = new QBJoinTree(); + JoinCond[] condn = new JoinCond[1]; + + switch (subQuery.getJoinType()) { + case LEFTOUTER: + joinTree.setNoOuterJoin(false); + condn[0] = new JoinCond(0, 1, JoinType.LEFTOUTER); + break; + case RIGHTOUTER: + joinTree.setNoOuterJoin(false); + condn[0] = new JoinCond(0, 1, JoinType.RIGHTOUTER); + break; + case FULLOUTER: + joinTree.setNoOuterJoin(false); + condn[0] = new JoinCond(0, 1, JoinType.FULLOUTER); + break; + case LEFTSEMI: + joinTree.setNoSemiJoin(false); + condn[0] = new JoinCond(0, 1, JoinType.LEFTSEMI); + break; + default: + condn[0] = new JoinCond(0, 1, JoinType.INNER); + joinTree.setNoOuterJoin(true); + break; + } + joinTree.setJoinCond(condn); + + if ( joiningOp instanceof JoinOperator ) { + QBJoinTree leftTree = joinContext.get(joiningOp); + joinTree.setJoinSrc(leftTree); + String[] leftChildAliases = leftTree.getLeftAliases(); + String leftAliases[] = new String[leftChildAliases.length + 1]; + for (int i = 0; i < leftChildAliases.length; i++) { + leftAliases[i] = leftChildAliases[i]; + } + leftAliases[leftChildAliases.length] = leftTree.getRightAliases()[0]; + joinTree.setLeftAliases(leftAliases); + } else { + String alias = unescapeIdentifier( + SubQueryUtils.getAlias(joiningOp, aliasToOpInfo).toLowerCase()); + joinTree.setLeftAlias(alias); + String[] leftAliases = new String[1]; + leftAliases[0] = alias; + joinTree.setLeftAliases(leftAliases); + String[] children = new String[2]; + children[0] = alias; + joinTree.setBaseSrc(children); + joinTree.setId(qb.getId()); + joinTree.getAliasToOpInfo().put( + getModifiedAlias(qb, alias), aliasToOpInfo.get(alias)); + } + + String rightalias = unescapeIdentifier(subQuery.getAlias().toLowerCase()); + String[] rightAliases = new String[1]; + rightAliases[0] = rightalias; + joinTree.setRightAliases(rightAliases); + String[] children = joinTree.getBaseSrc(); + if (children == null) { + children = new String[2]; + } + children[1] = rightalias; + joinTree.setBaseSrc(children); + joinTree.setId(qb.getId()); + joinTree.getAliasToOpInfo().put( + getModifiedAlias(qb, rightalias), aliasToOpInfo.get(rightalias)); + // remember rhs table for semijoin + if (joinTree.getNoSemiJoin() == false) { + joinTree.addRHSSemijoin(rightalias); + } + + ArrayList> expressions = new ArrayList>(); + expressions.add(new ArrayList()); + expressions.add(new ArrayList()); + joinTree.setExpressions(expressions); + + ArrayList nullsafes = new ArrayList(); + joinTree.setNullSafes(nullsafes); + + ArrayList> filters = new ArrayList>(); + filters.add(new ArrayList()); + filters.add(new ArrayList()); + joinTree.setFilters(filters); + joinTree.setFilterMap(new int[2][]); + + ArrayList> filtersForPushing = + new ArrayList>(); + filtersForPushing.add(new ArrayList()); + filtersForPushing.add(new ArrayList()); + joinTree.setFiltersForPushing(filtersForPushing); + + ASTNode joinCond = subQuery.getJoinConditionAST(); + ArrayList leftSrc = new ArrayList(); + parseJoinCondition(joinTree, joinCond, leftSrc); + if (leftSrc.size() == 1) { + joinTree.setLeftAlias(leftSrc.get(0)); + } + + return joinTree; + } + private QBJoinTree genJoinTree(QB qb, ASTNode joinParseTree, Map aliasToOpInfo) throws SemanticException { @@ -7194,7 +7417,7 @@ private boolean distinctExprsExists(QB qb) { } @SuppressWarnings("nls") - private Operator genBodyPlan(QB qb, Operator input) throws SemanticException { + private Operator genBodyPlan(QB qb, Operator input, Map aliasToOpInfo) throws SemanticException { QBParseInfo qbp = qb.getParseInfo(); TreeSet ks = new TreeSet(qbp.getClauseNames()); @@ -7290,7 +7513,7 @@ private Operator genBodyPlan(QB qb, Operator input) throws SemanticException { curr = inputs.get(dest); if (qbp.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, curr); + curr = genFilterPlan(dest, qb, curr, aliasToOpInfo); } if (qbp.getAggregationExprsForClause(dest).size() != 0 @@ -7320,7 +7543,7 @@ private Operator genBodyPlan(QB qb, Operator input) throws SemanticException { curr = genPostGroupByBodyPlan(curr, dest, qb); } } else { - curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input); + curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo); } } } @@ -8120,7 +8343,7 @@ public Operator genPlan(QB qb) throws SemanticException { srcOpInfo = lastPTFOp != null ? lastPTFOp : srcOpInfo; } - Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo); + Operator bodyOpInfo = genBodyPlan(qb, srcOpInfo, aliasToOpInfo); if (LOG.isDebugEnabled()) { LOG.debug("Created Plan for Query Block " + qb.getId()); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java new file mode 100644 index 0000000..cfb4e21 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -0,0 +1,408 @@ +package org.apache.hadoop.hive.ql.parse; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.antlr.runtime.tree.TreeWizard; +import org.antlr.runtime.tree.TreeWizard.ContextVisitor; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryOperator; +import org.apache.hadoop.hive.ql.parse.QBSubQuery.SubQueryOperatorType; + +public class SubQueryUtils { + + static void extractConjuncts(ASTNode node, List conjuncts) { + if (node.getType() != HiveParser.KW_AND ) { + conjuncts.add(node); + return; + } + extractConjuncts((ASTNode)node.getChild(0), conjuncts); + extractConjuncts((ASTNode)node.getChild(1), conjuncts); + } + + /* + * Remove the SubQuery from the Where CLause Tree. + * return the remaining WhereClause. + */ + static ASTNode rewriteParentQueryWhere(ASTNode whereCond, ASTNode subQuery) throws SemanticException { + ParentQueryWhereClauseRewrite rewrite = new ParentQueryWhereClauseRewrite(whereCond, subQuery); + return rewrite.remove(); + } + + static ASTNode constructTrueCond() { + ASTNode eq = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, "="); + ASTNode lhs = (ASTNode) ParseDriver.adaptor.create(HiveParser.Number, "1"); + ASTNode rhs = (ASTNode) ParseDriver.adaptor.create(HiveParser.Number, "1"); + ParseDriver.adaptor.addChild(eq, lhs); + ParseDriver.adaptor.addChild(eq, rhs); + return eq; + } + + static ASTNode andAST(ASTNode left, ASTNode right) { + if ( left == null ) { + return right; + } else if ( right == null ) { + return left; + } else { + Object o = ParseDriver.adaptor.create(HiveParser.KW_AND, "AND"); + ParseDriver.adaptor.addChild(o, left); + ParseDriver.adaptor.addChild(o, right); + return (ASTNode) o; + } + } + + static ASTNode orAST(ASTNode left, ASTNode right) { + if ( left == null ) { + return right; + } else if ( right == null ) { + return left; + } else { + Object o = ParseDriver.adaptor.create(HiveParser.KW_OR, "OR"); + ParseDriver.adaptor.addChild(o, left); + ParseDriver.adaptor.addChild(o, right); + return (ASTNode) o; + } + } + + static ASTNode isNull(ASTNode expr) { + ASTNode node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION"); + node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_ISNULL, "TOK_ISNULL")); + node.addChild(expr); + return node; + } + + + /* + * Check that SubQuery is a top level conjuncts. + * Remove it from the Where Clause AST. + */ + static class ParentQueryWhereClauseRewrite { + ASTNode root; + ASTNode subQuery; + + ParentQueryWhereClauseRewrite(ASTNode root, ASTNode subQuery) { + this.root = root; + this.subQuery = subQuery; + } + + ASTNode getParentInWhereClause(ASTNode node) { + if (node == null || node == root) { + return null; + } + return (ASTNode) node.getParent(); + } + + boolean removeSubQuery(ASTNode node) { + if (node.getType() == HiveParser.KW_AND) { + boolean r = removeSubQuery((ASTNode) node.getChild(0)); + if (!r) { + r = removeSubQuery((ASTNode) node.getChild(1)); + } + return r; + } else if (node.getType() == HiveParser.KW_NOT) { + ASTNode child = (ASTNode) node.getChild(0); + if (child == subQuery) { + ASTNode sqOpType = (ASTNode) subQuery.getChild(0).getChild(0); + if (sqOpType.getType() == HiveParser.KW_EXISTS) { + sqOpType.getToken().setType(HiveParser.TOK_SUBQUERY_OP_NOTEXISTS); + ASTNode parent = getParentInWhereClause(node); + if (parent == null) { + root = subQuery; + } else { + int nodeIdx = node.getChildIndex(); + parent.setChild(nodeIdx, subQuery); + } + return removeSubQuery(subQuery); + } + } + return false; + } else if (node == subQuery) { + ASTNode parent = getParentInWhereClause(node); + ASTNode gParent = getParentInWhereClause(parent); + ASTNode sibling = null; + + if (parent != null) { + if (subQuery.getChildIndex() == 0) { + sibling = (ASTNode) parent.getChild(1); + } else { + sibling = (ASTNode) parent.getChild(0); + } + } + + /* + * SubQuery was only condition in where clause + */ + if (sibling == null) { + root = constructTrueCond(); + } // SubQuery was just one conjunct + else if (gParent == null) { + root = sibling; + } else { + // otherwise replace parent by sibling. + int pIdx = parent.getChildIndex(); + gParent.setChild(pIdx, sibling); + } + return true; + } else { + return false; + } + } + + ASTNode remove() throws SemanticException { + boolean r = removeSubQuery(root); + if (r) { + return root; + } + /* + * Restriction.7.h :: SubQuery predicates can appear only as top level conjuncts. + */ + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + subQuery, "Only SubQuery expressions that are top level conjuncts are allowed")); + } + } + + static List findSubQueries(ASTNode node) + throws SemanticException { + TreeWizard tw = new TreeWizard(ParseDriver.adaptor, HiveParser.tokenNames); + SubQueryVisitor visitor = new SubQueryVisitor(); + tw.visit(node, HiveParser.TOK_SUBQUERY_EXPR, visitor); + return visitor.getSubQueries(); + } + + static class SubQueryVisitor implements ContextVisitor { + String errMsg; + boolean throwError = false; + ASTNode errorNode; + List subQueries; + + @SuppressWarnings("rawtypes") + @Override + public void visit(Object t, Object parent, int childIndex, Map labels) { + if (subQueries == null ) { + subQueries = new ArrayList(); + } + subQueries.add((ASTNode)t); + } + + public List getSubQueries() { + return subQueries; + } + + } + + static QBSubQuery buildSubQuery(String outerQueryId, + int sqIdx, + ASTNode sqAST, + ASTNode originalSQAST, + Context ctx) throws SemanticException { + ASTNode sqOp = (ASTNode) sqAST.getChild(0); + ASTNode sq = (ASTNode) sqAST.getChild(1); + ASTNode outerQueryExpr = (ASTNode) sqAST.getChild(2); + return new QBSubQuery(outerQueryId, sqIdx, sq, outerQueryExpr, + buildSQOperator(sqOp), + originalSQAST, + ctx); + } + + static SubQueryOperator buildSQOperator(ASTNode astSQOp) throws SemanticException { + ASTNode opAST = (ASTNode) astSQOp.getChild(0); + SubQueryOperatorType type = SubQueryOperatorType.get(opAST); + return new SubQueryOperator(opAST, type); + } + + /* + * is this expr a UDAF invocation; does it imply windowing + * @return + * 0 if implies neither + * 1 if implies aggregation + * 2 if implies windowing + */ + static int checkAggOrWindowing(ASTNode expressionTree) throws SemanticException { + int exprTokenType = expressionTree.getToken().getType(); + if (exprTokenType == HiveParser.TOK_FUNCTION + || exprTokenType == HiveParser.TOK_FUNCTIONDI + || exprTokenType == HiveParser.TOK_FUNCTIONSTAR) { + assert (expressionTree.getChildCount() != 0); + if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() + == HiveParser.TOK_WINDOWSPEC) { + return 2; + } + if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { + String functionName = SemanticAnalyzer.unescapeIdentifier(expressionTree.getChild(0) + .getText()); + if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { + return 1; + } + } + } + int r = 0; + for (int i = 0; i < expressionTree.getChildCount(); i++) { + int c = checkAggOrWindowing((ASTNode) expressionTree.getChild(i)); + r = Math.max(r, c); + } + return r; + } + + static List getTableAliasesInSubQuery(QBSubQuery sq) { + List aliases = new ArrayList(); + ASTNode joinAST = (ASTNode) sq.getSubQueryAST().getChild(0); + getTableAliasesInSubQuery((ASTNode) joinAST.getChild(0), aliases); + return aliases; + } + + private static void getTableAliasesInSubQuery(ASTNode joinNode, List aliases) { + + if ((joinNode.getToken().getType() == HiveParser.TOK_TABREF) + || (joinNode.getToken().getType() == HiveParser.TOK_SUBQUERY) + || (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION)) { + String tableName = SemanticAnalyzer.getUnescapedUnqualifiedTableName((ASTNode) joinNode.getChild(0)) + .toLowerCase(); + String alias = joinNode.getChildCount() == 1 ? tableName + : SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(joinNode.getChildCount() - 1) + .getText().toLowerCase()); + alias = (joinNode.getToken().getType() == HiveParser.TOK_PTBLFUNCTION) ? + SemanticAnalyzer.unescapeIdentifier(joinNode.getChild(1).getText().toLowerCase()) : + alias; + aliases.add(alias); + } else { + ASTNode left = (ASTNode) joinNode.getChild(0); + ASTNode right = (ASTNode) joinNode.getChild(1); + getTableAliasesInSubQuery(left, aliases); + getTableAliasesInSubQuery(right, aliases); + } + } + + /* + * construct the ASTNode for the SQ column that will join with the OuterQuery Expression. + * So for 'select ... from R1 where A in (select B from R2...)' + * this will build (= outerQueryExpr 'ast returned by call to buildSQJoinExpr') + */ + static ASTNode buildOuterQryToSQJoinCond(ASTNode outerQueryExpr, + String sqAlias, + RowResolver sqRR) { + ASTNode node = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, "="); + node.addChild(outerQueryExpr); + node.addChild(buildSQJoinExpr(sqAlias, sqRR, false)); + return node; + } + + /* + * construct the ASTNode for the SQ column that will join with the OuterQuery Expression. + * So for 'select ... from R1 where A in (select B from R2...)' + * this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B]) + * where 'SQ_1' is the alias generated for the SubQuery. + */ + static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR, + boolean useInternalName) { + + List signature = sqRR.getRowSchema().getSignature(); + ColumnInfo joinColumn = signature.get(0); + String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); + return createColRefAST(sqAlias, useInternalName ? joinColumn.getInternalName() : joinColName[1]); + } + + static ASTNode buildOuterJoinPostCond(String sqAlias, RowResolver sqRR) { + return isNull(buildSQJoinExpr(sqAlias, sqRR, false)); + } + + @SuppressWarnings("rawtypes") + static String getAlias(Operator o, Map aliasToOpInfo) { + for(Map.Entry e : aliasToOpInfo.entrySet()) { + if ( e.getValue() == o) { + return e.getKey(); + } + } + return null; + } + + static ASTNode createColRefAST(String tabAlias, String colName) { + ASTNode dot = (ASTNode) ParseDriver.adaptor.create(HiveParser.DOT, "."); + ASTNode tabAst = createTabRefAST(tabAlias); + ASTNode colAst = (ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, colName); + dot.addChild(tabAst); + dot.addChild(colAst); + return dot; + } + + static ASTNode createAliasAST(String colName) { + return (ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, colName); + } + + static ASTNode createTabRefAST(String tabAlias) { + ASTNode tabAst = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_TABLE_OR_COL, "TOK_TABLE_OR_COL"); + ASTNode tabName = (ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, tabAlias); + tabAst.addChild(tabName); + return tabAst; + } + + static ASTNode buildSelectExpr(ASTNode expression) { + ASTNode selAst = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"); + selAst.addChild(expression); + return selAst; + } + + static ASTNode buildGroupBy() { + ASTNode gBy = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_GROUPBY, "TOK_GROUPBY"); + return gBy; + } + + static ASTNode createSelectItem(ASTNode expr, ASTNode alias) { + ASTNode selectItem = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_SELEXPR, "TOK_SELEXPR"); + selectItem.addChild(expr); + selectItem.addChild(alias); + return selectItem; + } + + static ASTNode alterCorrelatedPredicate(ASTNode correlatedExpr, ASTNode sqAlias, boolean left) { + if ( left ) { + correlatedExpr.setChild(0, sqAlias); + } else { + correlatedExpr.setChild(1, sqAlias); + } + return correlatedExpr; + } + + static void addGroupExpressionToFront(ASTNode gBy, ASTNode expr) { + ASTNode grpExpr = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_GROUPING_SETS_EXPRESSION, "TOK_GROUPING_SETS_EXPRESSION"); + grpExpr.addChild(expr); + List newChildren = new ArrayList(); + newChildren.add(expr); + int i = gBy.getChildCount() - 1; + while ( i >= 0 ) { + newChildren.add((ASTNode) gBy.deleteChild(i)); + i--; + } + for(ASTNode child : newChildren ) { + gBy.addChild(child); + } + } + + static ASTNode buildPostJoinNullCheck(List subQueryJoinAliasExprs) { + ASTNode check = null; + for(ASTNode expr : subQueryJoinAliasExprs) { + check = orAST(check, isNull(expr)); + } + return check; + } + + static void setOriginDeep(ASTNode node, ASTNodeOrigin origin) { + if ( node == null ) { + return; + } + node.setOrigin(origin); + int childCnt = node.getChildCount(); + for(int i=0; i stack, NodeProcessorCtx procCtx, public static DefaultExprProcessor getDefaultExprProcessor() { return new DefaultExprProcessor(); } + + /** + * Processor for subquery expressions.. + */ + public static class SubQueryExprProcessor implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + + TypeCheckCtx ctx = (TypeCheckCtx) procCtx; + if (ctx.getError() != null) { + return null; + } + + ExprNodeDesc desc = TypeCheckProcFactory.processGByExpr(nd, procCtx); + if (desc != null) { + return desc; + } + + ASTNode expr = (ASTNode) nd; + ASTNode sqNode = (ASTNode) expr.getParent().getChild(1); + /* + * Restriction.1.h :: SubQueries only supported in the SQL Where Clause. + */ + ctx.setError(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg(sqNode, + "Currently SubQuery expressions are only allowed as Where Clause predicates"), + sqNode); + return null; + } + } + + /** + * Factory method to get SubQueryExprProcessor. + * + * @return DateExprProcessor. + */ + public static SubQueryExprProcessor getSubQueryExprProcessor() { + return new SubQueryExprProcessor(); + } } diff --git ql/src/test/queries/clientnegative/subquery_in_groupby.q ql/src/test/queries/clientnegative/subquery_in_groupby.q new file mode 100644 index 0000000..a9bc6ee --- /dev/null +++ ql/src/test/queries/clientnegative/subquery_in_groupby.q @@ -0,0 +1,5 @@ + + +select count(*) +from src +group by src.key in (select key from src s1 where s1.key > '9') \ No newline at end of file diff --git ql/src/test/queries/clientnegative/subquery_in_select.q ql/src/test/queries/clientnegative/subquery_in_select.q new file mode 100644 index 0000000..1365389 --- /dev/null +++ ql/src/test/queries/clientnegative/subquery_in_select.q @@ -0,0 +1,6 @@ + + + +select src.key in (select key from src s1 where s1.key > '9') +from src +; \ No newline at end of file diff --git ql/src/test/queries/clientnegative/subquery_windowing_corr.q ql/src/test/queries/clientnegative/subquery_windowing_corr.q new file mode 100644 index 0000000..bcc767b --- /dev/null +++ ql/src/test/queries/clientnegative/subquery_windowing_corr.q @@ -0,0 +1,26 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part; + + +-- corr and windowing +select p_mfgr, p_name, p_size +from part a +where a.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) + from part b + where a.p_brand = b.p_brand) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_exists.q ql/src/test/queries/clientpositive/subquery_exists.q new file mode 100644 index 0000000..f812e36 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_exists.q @@ -0,0 +1,45 @@ + + +-- no agg, corr +explain +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +; + +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +; + +-- view test +create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +; + +select * from cv1 +; + +-- sq in from +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in.q ql/src/test/queries/clientpositive/subquery_in.q new file mode 100644 index 0000000..6f83a88 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_in.q @@ -0,0 +1,155 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part; + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem; + +-- non agg, non corr +explain + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +; + +select * +from src +where src.key in (select key from src s1 where s1.key > '9') +; + +-- non agg, corr +explain +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +-- agg, non corr +explain +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +; +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +; + +-- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +; + +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +; + +-- distinct, corr +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +; + +-- non agg, non corr, windowing +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +; + +-- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +; + +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +; + +-- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +; diff --git ql/src/test/queries/clientpositive/subquery_multiinsert.q ql/src/test/queries/clientpositive/subquery_multiinsert.q new file mode 100644 index 0000000..1f65b16 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_multiinsert.q @@ -0,0 +1,45 @@ +CREATE TABLE src_4( + key STRING, + value STRING +) +; + +CREATE TABLE src_5( + key STRING, + value STRING +) +; + +explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +; + +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +; + +select * from src_4 +; +select * from src_5 +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_notexists.q ql/src/test/queries/clientpositive/subquery_notexists.q new file mode 100644 index 0000000..43a801f --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_notexists.q @@ -0,0 +1,41 @@ + + +-- no agg, corr +explain +select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +; + +select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +; + +-- distinct, corr +explain +select * +from src b +where not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +; + +select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_notin.q ql/src/test/queries/clientpositive/subquery_notin.q new file mode 100644 index 0000000..bd168c4 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_notin.q @@ -0,0 +1,122 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part; + +DROP TABLE lineitem; +CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem; + +-- non agg, non corr +explain +select * +from src +where src.key not in + ( select key from src s1 + where s1.key > '2' + ) +; + +select * +from src +where src.key not in ( select key from src s1 where s1.key > '2') +order by key +; + +-- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +; + +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, b.p_name +; + +-- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +; +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +order by p_name, p_size +; + +-- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +; + +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, p_size +; + +-- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +; diff --git ql/src/test/results/clientnegative/subquery_in_groupby.q.out ql/src/test/results/clientnegative/subquery_in_groupby.q.out new file mode 100644 index 0000000..809bb0a --- /dev/null +++ ql/src/test/results/clientnegative/subquery_in_groupby.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10249]: Line 5:37 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates diff --git ql/src/test/results/clientnegative/subquery_in_select.q.out ql/src/test/results/clientnegative/subquery_in_select.q.out new file mode 100644 index 0000000..3d74132 --- /dev/null +++ ql/src/test/results/clientnegative/subquery_in_select.q.out @@ -0,0 +1 @@ +FAILED: SemanticException [Error 10249]: Line 4:35 Unsupported SubQuery Expression ''9'': Currently SubQuery expressions are only allowed as Where Clause predicates diff --git ql/src/test/results/clientnegative/subquery_windowing_corr.q.out ql/src/test/results/clientnegative/subquery_windowing_corr.q.out new file mode 100644 index 0000000..c7de7b2 --- /dev/null +++ ql/src/test/results/clientnegative/subquery_windowing_corr.q.out @@ -0,0 +1,43 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +FAILED: SemanticException Line 9:8 Unsupported SubQuery Expression '1' in definition of SubQuery sq_1 [ +a.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) + from part b + where a.p_brand = b.p_brand) +] used as sq_1 at Line 7:15: Correlated Sub Queries cannot contain Windowing clauses. diff --git ql/src/test/results/clientpositive/subquery_exists.q.out ql/src/test/results/clientpositive/subquery_exists.q.out new file mode 100644 index 0000000..f55afd8 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_exists.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: -- no agg, corr +explain +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_9'))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: value + type: string + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_9') + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: _col1, _col2 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + expr: _col2 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- view test +create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- view test +create view cv1 as +select * +from src b +where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@cv1 +PREHOOK: query: select * from cv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from cv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- sq in from +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- sq in from +select * +from (select * + from src b + where exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_9') + ) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 diff --git ql/src/test/results/clientpositive/subquery_in.q.out ql/src/test/results/clientpositive/subquery_in.q.out new file mode 100644 index 0000000..32a248a --- /dev/null +++ ql/src/test/results/clientpositive/subquery_in.q.out @@ -0,0 +1,1267 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +POSTHOOK: Output: default@lineitem +PREHOOK: query: -- non agg, non corr +explain + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain + select * +from src +where src.key in (select key from src s1 where s1.key > '9') +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')))) (. (TOK_TABLE_OR_COL src) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + src + TableScan + alias: src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src +where src.key in (select key from src s1 where s1.key > '9') +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src +where src.key in (select key from src s1 where s1.key > '9') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- non agg, corr +explain +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (<= (TOK_TABLE_OR_COL r) 2)))) (. (TOK_TABLE_OR_COL part) p_size))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col5 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: avg(_col0) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: avg(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: double + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + Map-reduce partition columns: + expr: _col0 + type: double + tag: 1 + part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: UDFToDouble(p_size) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(p_size) + type: double + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col1, _col5 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name, p_size +from +part where part.p_size in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique salmon chartreuse burlywood 6 +almond antique medium spring khaki 6 +PREHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_size))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: min(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: int + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: int + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: int + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: 1 + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: p_size + type: int + expr: p_mfgr + type: string + sort order: ++ + Map-reduce partition columns: + expr: p_size + type: int + expr: p_mfgr + type: string + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} + 1 + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col5 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col1 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 +Manufacturer#5 almond antique sky peru orange 2 +Manufacturer#4 almond aquamarine yellow dodger mint 7 +PREHOOK: query: -- distinct, corr +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- distinct, corr +explain +select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: 1 + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where b.key in + (select distinct a.key + from src a + where b.value = a.value and a.key > '9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: -- non agg, non corr, windowing +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, non corr, windowing +select p_mfgr, p_name, p_size +from part +where part.p_size in + (select first_value(p_size) over(partition by p_mfgr order by p_size) from part) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#3 almond antique misty red olive 1 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#1 almond antique burnished rose metallic 2 +Manufacturer#5 almond antique sky peru orange 2 +Manufacturer#2 almond aquamarine midnight light salmon 2 +Manufacturer#4 almond aquamarine yellow dodger mint 7 +PREHOOK: query: -- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr, with join in Parent Query +explain +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (TOK_TABLE_OR_COL l_partkey) p_partkey)))) p) (TOK_TABREF (TOK_TABNAME lineitem) li) (= (. (TOK_TABLE_OR_COL p) p_partkey) (. (TOK_TABLE_OR_COL li) l_partkey)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL p) p_partkey)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL li) l_suppkey))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL li) l_linenumber) 1) (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME lineitem))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL l_orderkey))) (TOK_WHERE (= (TOK_TABLE_OR_COL l_shipmode) 'AIR')))) (. (TOK_TABLE_OR_COL li) l_orderkey)))))) + +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + p:lineitem + TableScan + alias: lineitem + Select Operator + expressions: + expr: l_partkey + type: int + outputColumnNames: l_partkey + Group By Operator + bucketGroup: false + keys: + expr: l_partkey + type: int + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: int + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: int + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 0 + value expressions: + expr: _col0 + type: int + li + TableScan + alias: li + Filter Operator + predicate: + expr: (l_linenumber = 1) + type: boolean + Reduce Output Operator + key expressions: + expr: l_partkey + type: int + sort order: + + Map-reduce partition columns: + expr: l_partkey + type: int + tag: 1 + value expressions: + expr: l_orderkey + type: int + expr: l_suppkey + type: int + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: int + sort order: + + Map-reduce partition columns: + expr: _col1 + type: int + tag: 0 + value expressions: + expr: _col3 + type: int + expr: _col0 + type: int + sq_1:lineitem + TableScan + alias: lineitem + Filter Operator + predicate: + expr: (l_shipmode = 'AIR') + type: boolean + Select Operator + expressions: + expr: l_orderkey + type: int + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: int + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: int + sort order: + + Map-reduce partition columns: + expr: _col0 + type: int + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col2} {VALUE._col18} + 1 + handleSkewJoin: false + outputColumnNames: _col2, _col18 + Select Operator + expressions: + expr: _col18 + type: int + expr: _col2 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +155190 7706 +4297 1798 +108570 8571 +82704 7721 +61336 8855 +2320 9821 +115118 7630 +115209 7721 +64128 9141 +40216 217 +PREHOOK: query: -- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, corr, with join in Parent Query +select p.p_partkey, li.l_suppkey +from (select distinct l_partkey as p_partkey from lineitem) p join lineitem li on p.p_partkey = li.l_partkey +where li.l_linenumber = 1 and + li.l_orderkey in (select l_orderkey from lineitem where l_shipmode = 'AIR' and l_linenumber = li.l_linenumber) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +4297 1798 +108570 8571 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out new file mode 100644 index 0000000..a0e7b13 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -0,0 +1,474 @@ +PREHOOK: query: CREATE TABLE src_4( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE src_4( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_4 +PREHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE src_5( + key STRING, + value STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@src_5 +PREHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_4))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) key) '9'))))) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME src_5))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '2')))) (. (TOK_TABLE_OR_COL b) key))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: key + type: string + expr: value + type: string + sort order: ++ + Map-reduce partition columns: + expr: key + type: string + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_4 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '2') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((1 = 1) and _col4 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_5 + + Stage: Stage-6 + Stats-Aggr Operator + + +PREHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_4 +PREHOOK: Output: default@src_5 +POSTHOOK: query: from src b +INSERT OVERWRITE TABLE src_4 + select * + where b.key in + (select a.key + from src a + where b.value = a.value and a.key > '9' + ) +INSERT OVERWRITE TABLE src_5 + select * + where b.key not in ( select key from src s1 where s1.key > '2') + order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_4 +POSTHOOK: Output: default@src_5 +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_4 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_4 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: select * from src_5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_5 +#### A masked pattern was here #### +POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 diff --git ql/src/test/results/clientpositive/subquery_notexists.q.out ql/src/test/results/clientpositive/subquery_notexists.q.out new file mode 100644 index 0000000..29d13c1 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notexists.q.out @@ -0,0 +1,551 @@ +PREHOOK: query: -- no agg, corr +explain +select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain +select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_2')))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: value + type: string + expr: key + type: string + sort order: ++ + Map-reduce partition columns: + expr: value + type: string + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_2') + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col2 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col1 + type: string + expr: _col2 + type: string + tag: 1 + value expressions: + expr: _col2 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col6 + Filter Operator + predicate: + expr: ((1 = 1) and _col6 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +PREHOOK: query: -- distinct, corr +explain +select * +from src b +where not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- distinct, corr +explain +select * +from src b +where not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) value) 'val_2')))))))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_2') + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col1 + type: string + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5 + Filter Operator + predicate: + expr: ((1 = 1) and _col5 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +where not exists + (select a.key + from src a + where b.value = a.value and a.value > 'val_2' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 diff --git ql/src/test/results/clientpositive/subquery_notin.q.out ql/src/test/results/clientpositive/subquery_notin.q.out new file mode 100644 index 0000000..90faab2 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notin.q.out @@ -0,0 +1,1012 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: DROP TABLE lineitem +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE lineitem +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE lineitem (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@lineitem +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +PREHOOK: type: LOAD +PREHOOK: Output: default@lineitem +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/lineitem.txt' OVERWRITE INTO TABLE lineitem +POSTHOOK: type: LOAD +POSTHOOK: Output: default@lineitem +PREHOOK: query: -- non agg, non corr +explain +select * +from src +where src.key not in + ( select key from src s1 + where s1.key > '2' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain +select * +from src +where src.key not in + ( select key from src s1 + where s1.key > '2' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '2')))) (. (TOK_TABLE_OR_COL src) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '2') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + src + TableScan + alias: src + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((1 = 1) and _col4 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src +where src.key not in ( select key from src s1 where s1.key > '2') +order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src +where src.key not in ( select key from src s1 where s1.key > '2') +order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +PREHOOK: query: -- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_name))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + sort order: ++ + Map-reduce partition columns: + expr: p_name + type: string + expr: p_mfgr + type: string + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col5, _col11 + Filter Operator + predicate: + expr: ((1 = 1) and _col11 is null) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col1 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, b.p_name +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, b.p_name, p_size +from part b +where b.p_name not in + (select p_name + from (select p_mfgr, p_name, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, b.p_name +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +Manufacturer#5 almond azure blanched chiffon midnight 23 +PREHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (<= (TOK_TABLE_OR_COL r) 2)))) (. (TOK_TABLE_OR_COL part) p_size))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col5 + type: int + outputColumnNames: _col0 + Group By Operator + aggregations: + expr: avg(_col0) + bucketGroup: false + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + sort order: + tag: -1 + value expressions: + expr: _col0 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: avg(VALUE._col0) + bucketGroup: false + mode: mergepartial + outputColumnNames: _col0 + Select Operator + expressions: + expr: _col0 + type: double + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: double + sort order: + + Map-reduce partition columns: + expr: _col0 + type: double + tag: 1 + value expressions: + expr: _col0 + type: double + part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: UDFToDouble(p_size) + type: double + sort order: + + Map-reduce partition columns: + expr: UDFToDouble(p_size) + type: double + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col5} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col1, _col5, _col11 + Filter Operator + predicate: + expr: ((1 = 1) and _col11 is null) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +order by p_name, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_name, p_size +from +part where part.p_size not in + (select avg(p_size) + from (select p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 + ) +order by p_name, p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +almond antique blue firebrick mint 31 +almond antique burnished rose metallic 2 +almond antique burnished rose metallic 2 +almond antique chartreuse khaki white 17 +almond antique chartreuse lavender yellow 34 +almond antique forest lavender goldenrod 14 +almond antique gainsboro frosted violet 10 +almond antique metallic orange dim 19 +almond antique misty red olive 1 +almond antique olive coral navajo 45 +almond antique sky peru orange 2 +almond antique violet chocolate turquoise 14 +almond antique violet mint lemon 39 +almond antique violet turquoise frosted 40 +almond aquamarine burnished black steel 28 +almond aquamarine dodger light gainsboro 46 +almond aquamarine floral ivory bisque 27 +almond aquamarine midnight light salmon 2 +almond aquamarine pink moccasin thistle 42 +almond aquamarine rose maroon antique 25 +almond aquamarine sandy cyan gainsboro 18 +almond aquamarine yellow dodger mint 7 +almond azure aquamarine papaya violet 12 +almond azure blanched chiffon midnight 23 +PREHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, corr +explain +select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_name)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size))) (TOK_WHERE (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP TOK_SUBQUERY_OP_NOTIN) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_TABLE_OR_COL p_size)) (TOK_SELEXPR (TOK_FUNCTION rank (TOK_WINDOWSPEC (TOK_PARTITIONINGSPEC (TOK_DISTRIBUTEBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL p_size)))))) r)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size)))) (TOK_WHERE (and (<= (TOK_TABLE_OR_COL r) 2) (= (. (TOK_TABLE_OR_COL b) p_mfgr) (. (TOK_TABLE_OR_COL a) p_mfgr)))))) (. (TOK_TABLE_OR_COL b) p_size))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-3 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Reduce Output Operator + key expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + sort order: ++ + Map-reduce partition columns: + expr: p_mfgr + type: string + tag: -1 + value expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Extract + PTF Operator + Filter Operator + predicate: + expr: (_wcol0 <= 2) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: min(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: int + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: int + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: int + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: int + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col0 + type: int + b + TableScan + alias: b + Reduce Output Operator + key expressions: + expr: p_size + type: int + expr: p_mfgr + type: string + sort order: ++ + Map-reduce partition columns: + expr: p_size + type: int + expr: p_mfgr + type: string + tag: 0 + value expressions: + expr: p_name + type: string + expr: p_mfgr + type: string + expr: p_size + type: int + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col1} {VALUE._col2} {VALUE._col5} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col1, _col2, _col5, _col11 + Filter Operator + predicate: + expr: ((1 = 1) and _col11 is null) + type: boolean + Select Operator + expressions: + expr: _col2 + type: string + expr: _col1 + type: string + expr: _col5 + type: int + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, p_size +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select p_mfgr, p_name, p_size +from part b where b.p_size not in + (select min(p_size) + from (select p_mfgr, p_size, rank() over(partition by p_mfgr order by p_size) as r from part) a + where r <= 2 and b.p_mfgr = a.p_mfgr + ) +order by p_mfgr, p_size +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 almond antique salmon chartreuse burlywood 6 +Manufacturer#1 almond aquamarine burnished black steel 28 +Manufacturer#1 almond antique chartreuse lavender yellow 34 +Manufacturer#1 almond aquamarine pink moccasin thistle 42 +Manufacturer#2 almond antique violet chocolate turquoise 14 +Manufacturer#2 almond aquamarine sandy cyan gainsboro 18 +Manufacturer#2 almond aquamarine rose maroon antique 25 +Manufacturer#2 almond antique violet turquoise frosted 40 +Manufacturer#3 almond antique forest lavender goldenrod 14 +Manufacturer#3 almond antique chartreuse khaki white 17 +Manufacturer#3 almond antique metallic orange dim 19 +Manufacturer#3 almond antique olive coral navajo 45 +Manufacturer#4 almond antique gainsboro frosted violet 10 +Manufacturer#4 almond azure aquamarine papaya violet 12 +Manufacturer#4 almond aquamarine floral ivory bisque 27 +Manufacturer#4 almond antique violet mint lemon 39 +Manufacturer#5 almond antique medium spring khaki 6 +Manufacturer#5 almond azure blanched chiffon midnight 23 +Manufacturer#5 almond antique blue firebrick mint 31 +Manufacturer#5 almond aquamarine dodger light gainsboro 46 +PREHOOK: query: -- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +PREHOOK: type: QUERY +PREHOOK: Input: default@lineitem +#### A masked pattern was here #### +POSTHOOK: query: -- non agg, non corr, Group By in Parent Query +select li.l_partkey, count(*) +from lineitem li +where li.l_linenumber = 1 and + li.l_orderkey not in (select l_orderkey from lineitem where l_shipmode = 'AIR') +group by li.l_partkey +POSTHOOK: type: QUERY +POSTHOOK: Input: default@lineitem +#### A masked pattern was here #### +450 1 +7068 1 +21636 1 +22630 1 +59694 1 +61931 1 +85951 1 +88035 1 +88362 1 +106170 1 +119477 1 +119767 1 +123076 1 +139636 1 +175839 1 +182052 1