diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index fa111cc..3fbe8e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -69,6 +69,14 @@ * If this QB represents a SubQuery predicate then this will point to the SubQuery object. */ private QBSubQuery subQueryPredicateDef; + + /* + * used to give a unique name to each SubQuery QB Currently there can be at + * most 2 SubQueries in a Query: 1 in the Where clause, and 1 in the Having + * clause. + */ + private int numSubQueryPredicates; + // results @@ -320,5 +328,13 @@ protected void setSubQueryDef(QBSubQuery subQueryPredicateDef) { protected QBSubQuery getSubQueryPredicateDef() { return subQueryPredicateDef; } + + protected int getNumSubQueryPredicates() { + return numSubQueryPredicates; + } + + protected int incrNumSubQueryPredicates() { + return ++numSubQueryPredicates; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 3e8215d..dcfd6a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -4,11 +4,14 @@ import java.util.List; import java.util.Stack; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -129,14 +132,22 @@ public boolean refersSubQuery() { private final ASTNode rightExpr; private final ExprType leftExprType; private final ExprType rightExprType; - - public Conjunct(ASTNode leftExpr, ASTNode rightExpr, ExprType leftExprType, - ExprType rightExprType) { + private final ColumnInfo leftOuterColInfo; + private final ColumnInfo rightOuterColInfo; + + public Conjunct(ASTNode leftExpr, + ASTNode rightExpr, + ExprType leftExprType, + ExprType rightExprType, + ColumnInfo leftOuterColInfo, + ColumnInfo rightOuterColInfo) { super(); this.leftExpr = leftExpr; this.rightExpr = rightExpr; this.leftExprType = leftExprType; this.rightExprType = rightExprType; + this.leftOuterColInfo = leftOuterColInfo; + this.rightOuterColInfo = rightOuterColInfo; } public ASTNode getLeftExpr() { return leftExpr; @@ -173,16 +184,28 @@ boolean refersOuterOnly() { } return leftExprType.combine(rightExprType) == ExprType.REFERS_PARENT; } + public ColumnInfo getLeftOuterColInfo() { + return leftOuterColInfo; + } + public ColumnInfo getRightOuterColInfo() { + return rightOuterColInfo; + } } class ConjunctAnalyzer { RowResolver parentQueryRR; + boolean forHavingClause; + String parentQueryNewAlias; NodeProcessor defaultExprProcessor; Stack stack; - ConjunctAnalyzer(RowResolver parentQueryRR) { + ConjunctAnalyzer(RowResolver parentQueryRR, + boolean forHavingClause, + String parentQueryNewAlias) { this.parentQueryRR = parentQueryRR; defaultExprProcessor = new DefaultExprProcessor(); + this.forHavingClause = forHavingClause; + this.parentQueryNewAlias = parentQueryNewAlias; stack = new Stack(); } @@ -195,25 +218,34 @@ boolean refersOuterOnly() { * 3. All other expressions have a Type based on their children. * An Expr w/o children is assumed to refer to neither. */ - private ExprType analyzeExpr(ASTNode expr) { - ExprNodeDesc exprNode; + private ObjectPair analyzeExpr(ASTNode expr) { + ColumnInfo cInfo = null; + if ( forHavingClause ) { + try { + cInfo = parentQueryRR.getExpression(expr); + if ( cInfo != null) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); + } + } catch(SemanticException se) { + } + } if ( expr.getType() == HiveParser.DOT) { ASTNode dot = firstDot(expr); - exprNode = resolveDot(dot); - if ( exprNode != null ) { - return ExprType.REFERS_PARENT; + cInfo = resolveDot(dot); + if ( cInfo != null ) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); } - return ExprType.REFERS_SUBQUERY; + return ObjectPair.create(ExprType.REFERS_SUBQUERY, null); } else if ( expr.getType() == HiveParser.TOK_TABLE_OR_COL ) { - return ExprType.REFERS_SUBQUERY; + return ObjectPair.create(ExprType.REFERS_SUBQUERY, null); } else { ExprType exprType = ExprType.REFERS_NONE; int cnt = expr.getChildCount(); for(int i=0; i < cnt; i++) { ASTNode child = (ASTNode) expr.getChild(i); - exprType = exprType.combine(analyzeExpr(child)); + exprType = exprType.combine(analyzeExpr(child).getFirst()); } - return exprType; + return ObjectPair.create(exprType, null); } } @@ -234,13 +266,17 @@ Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { if ( type == HiveParser.EQUAL ) { ASTNode left = (ASTNode) conjunct.getChild(0); ASTNode right = (ASTNode) conjunct.getChild(1); - ExprType leftType = analyzeExpr(left); - ExprType rightType = analyzeExpr(right); + ObjectPair leftInfo = analyzeExpr(left); + ObjectPair rightInfo = analyzeExpr(right); - return new Conjunct(left, right, leftType, rightType); + return new Conjunct(left, right, + leftInfo.getFirst(), rightInfo.getFirst(), + leftInfo.getSecond(), rightInfo.getSecond()); } else { - ExprType sqExprType = analyzeExpr(conjunct); - return new Conjunct(conjunct, null, sqExprType, null); + ObjectPair sqExprInfo = analyzeExpr(conjunct); + return new Conjunct(conjunct, null, + sqExprInfo.getFirst(), null, + sqExprInfo.getSecond(), sqExprInfo.getSecond()); } } @@ -248,16 +284,20 @@ Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { * Try to resolve a qualified name as a column reference on the Parent Query's RowResolver. * Apply this logic on the leftmost(first) dot in an AST tree. */ - protected ExprNodeDesc resolveDot(ASTNode node) { + protected ColumnInfo resolveDot(ASTNode node) { try { TypeCheckCtx tcCtx = new TypeCheckCtx(parentQueryRR); String str = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(1).getText()); ExprNodeDesc idDesc = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); - return (ExprNodeDesc) - defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) + defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc); + if ( colDesc != null ) { + String[] qualName = parentQueryRR.reverseLookup(colDesc.getColumn()); + return parentQueryRR.get(qualName[0], qualName[1]); + } } catch(SemanticException se) { - return null; } + return null; } /* @@ -295,6 +335,8 @@ protected ASTNode firstDot(ASTNode dot) { private int numOfCorrelationExprsAddedToSQSelect; private boolean groupbyAddedToSQ; + + private int numOuterCorrExprsForHaving; public QBSubQuery(String outerQueryId, int sqIdx, @@ -311,6 +353,7 @@ public QBSubQuery(String outerQueryId, this.sqIdx = sqIdx; this.alias = "sq_" + this.sqIdx; this.numCorrExprsinSQ = 0; + this.numOuterCorrExprsForHaving = 0; String s = ctx.getTokenRewriteStream().toString( originalSQAST.getTokenStartIndex(), originalSQAST.getTokenStopIndex()); originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); @@ -328,7 +371,9 @@ public SubQueryTypeDef getOperator() { return operator; } - void validateAndRewriteAST(RowResolver outerQueryRR) throws SemanticException { + void validateAndRewriteAST(RowResolver outerQueryRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); @@ -359,7 +404,7 @@ void validateAndRewriteAST(RowResolver outerQueryRR) throws SemanticException { containsAggregationExprs = containsAggregationExprs | ( r == 1 ); } - rewrite(outerQueryRR); + rewrite(outerQueryRR, forHavingClause, outerQueryAlias); SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin); @@ -418,14 +463,28 @@ private void setJoinType() { } } - void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR) throws SemanticException { + void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode parentQueryJoinCond = null; if ( parentQueryExpression != null ) { + + ColumnInfo outerQueryCol = null; + try { + outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); + } catch(SemanticException se) { + } + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( getOuterQueryExpression(), alias, sqRR); + + if ( outerQueryCol != null ) { + rewriteCorrConjunctForHaving(parentQueryJoinCond, true, + outerQueryAlias, outerQueryRR, outerQueryCol); + } } joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST); setJoinType(); @@ -495,7 +554,9 @@ String getNextCorrExprAlias() { * - If predicate is not correlated, let it remain in the SubQuery * where clause. */ - private void rewrite(RowResolver parentQueryRR) throws SemanticException { + private void rewrite(RowResolver parentQueryRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); ASTNode whereClause = null; if ( subQueryAST.getChild(1).getChildCount() > 2 && @@ -511,7 +572,8 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { List conjuncts = new ArrayList(); SubQueryUtils.extractConjuncts(searchCond, conjuncts); - ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR); + ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR, + forHavingClause, outerQueryAlias); ASTNode sqNewSearchCond = null; for(ASTNode conjunctAST : conjuncts) { @@ -545,6 +607,10 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias); if ( conjunct.getLeftExprType().refersSubQuery() ) { + if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { + rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, + parentQueryRR, conjunct.getRightOuterColInfo()); + } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( conjunctAST, sqExprForCorr, true); joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); @@ -557,6 +623,10 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); } } else { + if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { + rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, + parentQueryRR, conjunct.getLeftOuterColInfo()); + } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( conjunctAST, sqExprForCorr, false); joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); @@ -642,4 +712,21 @@ public ASTNode getJoinConditionAST() { public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } + + private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, + boolean refersLeft, + String outerQueryAlias, + RowResolver outerQueryRR, + ColumnInfo outerQueryCol) { + + String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++; + ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias); + if ( refersLeft ) { + conjunctASTNode.setChild(0, outerExprForCorr); + } else { + conjunctASTNode.setChild(1, outerExprForCorr); + } + outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7979873..28c4c1c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1897,7 +1897,8 @@ private void extractJoinCondsFromWhereClause(QBJoinTree joinTree, QB qb, String } @SuppressWarnings("nls") - private Operator genHavingPlan(String dest, QB qb, Operator input) + private Operator genHavingPlan(String dest, QB qb, Operator input, + Map aliasToOpInfo) throws SemanticException { ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest); @@ -1912,21 +1913,19 @@ private Operator genHavingPlan(String dest, QB qb, Operator input) } ASTNode condn = (ASTNode) havingExpr.getChild(0); - Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( - inputRR.getColumnInfos()), input), inputRR); - + Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true); + output = putOpInsertMap(output, inputRR); return output; } @SuppressWarnings("nls") - private Operator genFilterPlan(String dest, QB qb, Operator input, - Map aliasToOpInfo) + private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, + Map aliasToOpInfo, + boolean forHavingClause) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); - ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); /* * Handling of SubQuery Expressions: @@ -1950,7 +1949,6 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, * endif * endif */ - ASTNode searchCond = (ASTNode) whereExpr.getChild(0); List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); if ( subQueriesInOriginalTree.size() > 0 ) { @@ -1982,13 +1980,20 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, ASTNode subQueryAST = subQueries.get(i); ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); - int sqIdx = i+1; + int sqIdx = qb.incrNumSubQueryPredicates(); clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx); + + String havingInputAlias = null; + + if ( forHavingClause ) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToOpInfo.put(havingInputAlias, input); + } - subQuery.validateAndRewriteAST(inputRR); + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias); QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); qbSQ.setSubQueryDef(subQuery); @@ -2014,7 +2019,7 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, /* * Gen Join between outer Operator and SQ op */ - subQuery.buildJoinCondition(inputRR, sqRR); + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); QBJoinTree joinTree = genSQJoinTree(qb, subQuery, input, aliasToOpInfo); @@ -4410,14 +4415,15 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat curr = forwardOp; if (parseInfo.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, forwardOp, aliasToOpInfo); + ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false); } // Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null); - curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb); + curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, aliasToOpInfo); } return curr; @@ -7777,7 +7783,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT curr = inputs.get(dest); if (qbp.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, curr, aliasToOpInfo); + ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false); } if (qbp.getAggregationExprsForClause(dest).size() != 0 @@ -7804,7 +7811,7 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT } } - curr = genPostGroupByBodyPlan(curr, dest, qb); + curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo); } } else { curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo); @@ -7830,7 +7837,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT return inputs; } - private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) + private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, + Map aliasToOpInfo) throws SemanticException { QBParseInfo qbp = qb.getParseInfo(); @@ -7840,7 +7848,7 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) if (getGroupByForClause(qbp, dest).size() == 0) { throw new SemanticException("HAVING specified without GROUP BY"); } - curr = genHavingPlan(dest, qb, curr); + curr = genHavingPlan(dest, qb, curr, aliasToOpInfo); } diff --git ql/src/test/queries/clientpositive/subquery_exists_having.q ql/src/test/queries/clientpositive/subquery_exists_having.q new file mode 100644 index 0000000..690aa10 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_exists_having.q @@ -0,0 +1,60 @@ + + +-- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +-- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +select * from cv1; + +-- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +; + +-- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in_having.q ql/src/test/queries/clientpositive/subquery_in_having.q new file mode 100644 index 0000000..ac9d1e6 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_in_having.q @@ -0,0 +1,63 @@ + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + +-- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +; + + +select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key; + +select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +; + +-- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +; + +-- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +; + +-- join on agg +select b.key, min(b.value) +from src b +group by b.key +having b.key in ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_notexists_having.q ql/src/test/queries/clientpositive/subquery_notexists_having.q new file mode 100644 index 0000000..7205d17 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_notexists_having.q @@ -0,0 +1,46 @@ + + +-- no agg, corr +explain +select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +; + +select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +; + + +-- distinct, corr +explain +select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +; + +select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_notin_having.q ql/src/test/queries/clientpositive/subquery_notin_having.q new file mode 100644 index 0000000..a586f02 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_notin_having.q @@ -0,0 +1,74 @@ +DROP TABLE part; + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + + +-- non agg, non corr +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +; + +-- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +; + +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +; + +-- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +; + +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +; \ No newline at end of file diff --git ql/src/test/results/clientpositive/subquery_exists_having.q.out ql/src/test/results/clientpositive/subquery_exists_having.q.out new file mode 100644 index 0000000..b3534aa --- /dev/null +++ ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -0,0 +1,290 @@ +PREHOOK: query: -- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (> (. (TOK_TABLE_OR_COL a) value) 'val_9'))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_9') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@cv1 +PREHOOK: query: select * from cv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from cv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out new file mode 100644 index 0000000..ee1fcc4 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -0,0 +1,783 @@ +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: -- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: bigint + tag: 1 + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: bigint + sort order: + + Map-reduce partition columns: + expr: _col1 + type: bigint + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +409 3 +187 3 +403 3 +396 3 +384 3 +369 3 +498 3 +5 3 +35 3 +327 3 +167 3 +318 3 +316 3 +311 3 +298 3 +70 3 +90 3 +128 3 +273 3 +208 3 +199 3 +193 3 +0 3 +119 3 +480 3 +466 3 +454 3 +438 3 +431 3 +430 3 +417 3 +PREHOOK: query: -- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL s1) key) '9') (= (. (TOK_TABLE_OR_COL s1) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col2 + type: bigint + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col2 + type: bigint + expr: _col1 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: bigint + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: bigint + expr: _col1 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_HAVING (< (- (TOK_FUNCTION max (TOK_TABLE_OR_COL p_size)) (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size))) 20)))) (. (TOK_TABLE_OR_COL b) p_mfgr))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: + expr: avg(p_size) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: avg(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:part + TableScan + alias: part + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: + expr: max(p_size) + expr: min(p_size) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: int + expr: _col2 + type: int + Reduce Operator Tree: + Group By Operator + aggregations: + expr: max(VALUE._col0) + expr: min(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col1 - _col2) < 20) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having b.key in ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having b.key in ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 8dfb485..a917a13 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -189,7 +189,7 @@ STAGE PLANS: type: string expr: value type: string - sq_1:s1 + sq_2:s1 TableScan alias: s1 Filter Operator diff --git ql/src/test/results/clientpositive/subquery_notexists_having.q.out ql/src/test/results/clientpositive/subquery_notexists_having.q.out new file mode 100644 index 0000000..1c33caa --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notexists_having.q.out @@ -0,0 +1,455 @@ +PREHOOK: query: -- no agg, corr +explain +select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain +select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (> (. (TOK_TABLE_OR_COL a) value) 'val_12')))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col0 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col1 + type: string + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_12') + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + expr: _col2 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col1 + type: string + expr: _col2 + type: string + tag: 1 + value expressions: + expr: _col2 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col2} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col8 + Filter Operator + predicate: + expr: ((1 = 1) and _col8 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +group by key, value +having not exists + (select a.key + from src a + where b.value = a.value and a.key = b.key and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 +PREHOOK: query: -- distinct, corr +explain +select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- distinct, corr +explain +select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECTDI (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL b) value) (. (TOK_TABLE_OR_COL a) value)) (> (. (TOK_TABLE_OR_COL a) value) 'val_12')))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: 1 + value expressions: + expr: _col1 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col1} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col6 + Filter Operator + predicate: + expr: ((1 = 1) and _col6 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_12') + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: string + outputColumnNames: _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * +from src b +group by key, value +having not exists + (select distinct a.key + from src a + where b.value = a.value and a.value > 'val_12' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +10 val_10 +100 val_100 +103 val_103 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +119 val_119 +12 val_12 diff --git ql/src/test/results/clientpositive/subquery_notin_having.q.out ql/src/test/results/clientpositive/subquery_notin_having.q.out new file mode 100644 index 0000000..f9598c2 --- /dev/null +++ ql/src/test/results/clientpositive/subquery_notin_having.q.out @@ -0,0 +1,700 @@ +PREHOOK: query: DROP TABLE part +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE part +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: -- non agg, non corr +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain +select key, count(*) +from src +group by key +having key not in + ( select key from src s1 + where s1.key > '12' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '12')))) (TOK_TABLE_OR_COL key)))))) + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '12') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((1 = 1) and _col4 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)) l) (TOK_SELEXPR (TOK_FUNCTION max (TOK_TABLE_OR_COL p_retailprice)) r) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_retailprice)) a)) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)))) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_WHERE (and (= (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)) (TOK_TABLE_OR_COL l)) (> (- (TOK_TABLE_OR_COL r) (TOK_TABLE_OR_COL l)) 600))))) (. (TOK_TABLE_OR_COL b) p_mfgr)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: min(p_retailprice) + expr: max(p_retailprice) + expr: avg(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + expr: _col3 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + expr: max(VALUE._col1) + expr: avg(VALUE._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: double + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: double + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: double + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: double + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col7 + Filter Operator + predicate: + expr: ((1 = 1) and _col7 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a:part + TableScan + alias: part + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: min(p_retailprice) + expr: max(p_retailprice) + expr: avg(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + expr: _col3 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + expr: max(VALUE._col1) + expr: avg(VALUE._col2) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Filter Operator + predicate: + expr: ((_col2 - _col1) > 600) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from (select p_mfgr, min(p_retailprice) l, max(p_retailprice) r, avg(p_retailprice) a from part group by p_mfgr) a + where min(p_retailprice) = l and r - l > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68 +PREHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (not (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_HAVING (> (- (TOK_FUNCTION max (TOK_TABLE_OR_COL p_retailprice)) (TOK_FUNCTION min (TOK_TABLE_OR_COL p_retailprice))) 600)))) (. (TOK_TABLE_OR_COL b) p_mfgr)))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: min(p_retailprice) + expr: max(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: min(VALUE._col0) + expr: max(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + value expressions: + expr: _col0 + type: string + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col5 + Filter Operator + predicate: + expr: ((1 = 1) and _col5 is null) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:a + TableScan + alias: a + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_retailprice + type: double + outputColumnNames: p_mfgr, p_retailprice + Group By Operator + aggregations: + expr: max(p_retailprice) + expr: min(p_retailprice) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: double + expr: _col2 + type: double + Reduce Operator Tree: + Group By Operator + aggregations: + expr: max(VALUE._col0) + expr: min(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col1 - _col2) > 600) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@part +#### A masked pattern was here #### +POSTHOOK: query: select b.p_mfgr, min(p_retailprice) +from part b +group by b.p_mfgr +having b.p_mfgr not in + (select p_mfgr + from part a + group by p_mfgr + having max(p_retailprice) - min(p_retailprice) > 600 + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@part +#### A masked pattern was here #### +Manufacturer#1 1173.15 +Manufacturer#2 1690.68