diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java index fa111cc..3fbe8e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QB.java @@ -69,6 +69,14 @@ * If this QB represents a SubQuery predicate then this will point to the SubQuery object. */ private QBSubQuery subQueryPredicateDef; + + /* + * used to give a unique name to each SubQuery QB Currently there can be at + * most 2 SubQueries in a Query: 1 in the Where clause, and 1 in the Having + * clause. + */ + private int numSubQueryPredicates; + // results @@ -320,5 +328,13 @@ protected void setSubQueryDef(QBSubQuery subQueryPredicateDef) { protected QBSubQuery getSubQueryPredicateDef() { return subQueryPredicateDef; } + + protected int getNumSubQueryPredicates() { + return numSubQueryPredicates; + } + + protected int incrNumSubQueryPredicates() { + return ++numSubQueryPredicates; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 3e8215d..dcfd6a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -4,11 +4,14 @@ import java.util.List; import java.util.Stack; +import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -129,14 +132,22 @@ public boolean refersSubQuery() { private final ASTNode rightExpr; private final ExprType leftExprType; private final ExprType rightExprType; - - public Conjunct(ASTNode leftExpr, ASTNode rightExpr, ExprType leftExprType, - ExprType rightExprType) { + private final ColumnInfo leftOuterColInfo; + private final ColumnInfo rightOuterColInfo; + + public Conjunct(ASTNode leftExpr, + ASTNode rightExpr, + ExprType leftExprType, + ExprType rightExprType, + ColumnInfo leftOuterColInfo, + ColumnInfo rightOuterColInfo) { super(); this.leftExpr = leftExpr; this.rightExpr = rightExpr; this.leftExprType = leftExprType; this.rightExprType = rightExprType; + this.leftOuterColInfo = leftOuterColInfo; + this.rightOuterColInfo = rightOuterColInfo; } public ASTNode getLeftExpr() { return leftExpr; @@ -173,16 +184,28 @@ boolean refersOuterOnly() { } return leftExprType.combine(rightExprType) == ExprType.REFERS_PARENT; } + public ColumnInfo getLeftOuterColInfo() { + return leftOuterColInfo; + } + public ColumnInfo getRightOuterColInfo() { + return rightOuterColInfo; + } } class ConjunctAnalyzer { RowResolver parentQueryRR; + boolean forHavingClause; + String parentQueryNewAlias; NodeProcessor defaultExprProcessor; Stack stack; - ConjunctAnalyzer(RowResolver parentQueryRR) { + ConjunctAnalyzer(RowResolver parentQueryRR, + boolean forHavingClause, + String parentQueryNewAlias) { this.parentQueryRR = parentQueryRR; defaultExprProcessor = new DefaultExprProcessor(); + this.forHavingClause = forHavingClause; + this.parentQueryNewAlias = parentQueryNewAlias; stack = new Stack(); } @@ -195,25 +218,34 @@ boolean refersOuterOnly() { * 3. All other expressions have a Type based on their children. * An Expr w/o children is assumed to refer to neither. */ - private ExprType analyzeExpr(ASTNode expr) { - ExprNodeDesc exprNode; + private ObjectPair analyzeExpr(ASTNode expr) { + ColumnInfo cInfo = null; + if ( forHavingClause ) { + try { + cInfo = parentQueryRR.getExpression(expr); + if ( cInfo != null) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); + } + } catch(SemanticException se) { + } + } if ( expr.getType() == HiveParser.DOT) { ASTNode dot = firstDot(expr); - exprNode = resolveDot(dot); - if ( exprNode != null ) { - return ExprType.REFERS_PARENT; + cInfo = resolveDot(dot); + if ( cInfo != null ) { + return ObjectPair.create(ExprType.REFERS_PARENT, cInfo); } - return ExprType.REFERS_SUBQUERY; + return ObjectPair.create(ExprType.REFERS_SUBQUERY, null); } else if ( expr.getType() == HiveParser.TOK_TABLE_OR_COL ) { - return ExprType.REFERS_SUBQUERY; + return ObjectPair.create(ExprType.REFERS_SUBQUERY, null); } else { ExprType exprType = ExprType.REFERS_NONE; int cnt = expr.getChildCount(); for(int i=0; i < cnt; i++) { ASTNode child = (ASTNode) expr.getChild(i); - exprType = exprType.combine(analyzeExpr(child)); + exprType = exprType.combine(analyzeExpr(child).getFirst()); } - return exprType; + return ObjectPair.create(exprType, null); } } @@ -234,13 +266,17 @@ Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { if ( type == HiveParser.EQUAL ) { ASTNode left = (ASTNode) conjunct.getChild(0); ASTNode right = (ASTNode) conjunct.getChild(1); - ExprType leftType = analyzeExpr(left); - ExprType rightType = analyzeExpr(right); + ObjectPair leftInfo = analyzeExpr(left); + ObjectPair rightInfo = analyzeExpr(right); - return new Conjunct(left, right, leftType, rightType); + return new Conjunct(left, right, + leftInfo.getFirst(), rightInfo.getFirst(), + leftInfo.getSecond(), rightInfo.getSecond()); } else { - ExprType sqExprType = analyzeExpr(conjunct); - return new Conjunct(conjunct, null, sqExprType, null); + ObjectPair sqExprInfo = analyzeExpr(conjunct); + return new Conjunct(conjunct, null, + sqExprInfo.getFirst(), null, + sqExprInfo.getSecond(), sqExprInfo.getSecond()); } } @@ -248,16 +284,20 @@ Conjunct analyzeConjunct(ASTNode conjunct) throws SemanticException { * Try to resolve a qualified name as a column reference on the Parent Query's RowResolver. * Apply this logic on the leftmost(first) dot in an AST tree. */ - protected ExprNodeDesc resolveDot(ASTNode node) { + protected ColumnInfo resolveDot(ASTNode node) { try { TypeCheckCtx tcCtx = new TypeCheckCtx(parentQueryRR); String str = BaseSemanticAnalyzer.unescapeIdentifier(node.getChild(1).getText()); ExprNodeDesc idDesc = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, str); - return (ExprNodeDesc) - defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc); + ExprNodeColumnDesc colDesc = (ExprNodeColumnDesc) + defaultExprProcessor.process(node, stack, tcCtx, (Object) null, idDesc); + if ( colDesc != null ) { + String[] qualName = parentQueryRR.reverseLookup(colDesc.getColumn()); + return parentQueryRR.get(qualName[0], qualName[1]); + } } catch(SemanticException se) { - return null; } + return null; } /* @@ -295,6 +335,8 @@ protected ASTNode firstDot(ASTNode dot) { private int numOfCorrelationExprsAddedToSQSelect; private boolean groupbyAddedToSQ; + + private int numOuterCorrExprsForHaving; public QBSubQuery(String outerQueryId, int sqIdx, @@ -311,6 +353,7 @@ public QBSubQuery(String outerQueryId, this.sqIdx = sqIdx; this.alias = "sq_" + this.sqIdx; this.numCorrExprsinSQ = 0; + this.numOuterCorrExprsForHaving = 0; String s = ctx.getTokenRewriteStream().toString( originalSQAST.getTokenStartIndex(), originalSQAST.getTokenStopIndex()); originalSQASTOrigin = new ASTNodeOrigin("SubQuery", alias, s, alias, originalSQAST); @@ -328,7 +371,9 @@ public SubQueryTypeDef getOperator() { return operator; } - void validateAndRewriteAST(RowResolver outerQueryRR) throws SemanticException { + void validateAndRewriteAST(RowResolver outerQueryRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); @@ -359,7 +404,7 @@ void validateAndRewriteAST(RowResolver outerQueryRR) throws SemanticException { containsAggregationExprs = containsAggregationExprs | ( r == 1 ); } - rewrite(outerQueryRR); + rewrite(outerQueryRR, forHavingClause, outerQueryAlias); SubQueryUtils.setOriginDeep(subQueryAST, originalSQASTOrigin); @@ -418,14 +463,28 @@ private void setJoinType() { } } - void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR) throws SemanticException { + void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode parentQueryJoinCond = null; if ( parentQueryExpression != null ) { + + ColumnInfo outerQueryCol = null; + try { + outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); + } catch(SemanticException se) { + } + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( getOuterQueryExpression(), alias, sqRR); + + if ( outerQueryCol != null ) { + rewriteCorrConjunctForHaving(parentQueryJoinCond, true, + outerQueryAlias, outerQueryRR, outerQueryCol); + } } joinConditionAST = SubQueryUtils.andAST(parentQueryJoinCond, joinConditionAST); setJoinType(); @@ -495,7 +554,9 @@ String getNextCorrExprAlias() { * - If predicate is not correlated, let it remain in the SubQuery * where clause. */ - private void rewrite(RowResolver parentQueryRR) throws SemanticException { + private void rewrite(RowResolver parentQueryRR, + boolean forHavingClause, + String outerQueryAlias) throws SemanticException { ASTNode selectClause = (ASTNode) subQueryAST.getChild(1).getChild(1); ASTNode whereClause = null; if ( subQueryAST.getChild(1).getChildCount() > 2 && @@ -511,7 +572,8 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { List conjuncts = new ArrayList(); SubQueryUtils.extractConjuncts(searchCond, conjuncts); - ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR); + ConjunctAnalyzer conjunctAnalyzer = new ConjunctAnalyzer(parentQueryRR, + forHavingClause, outerQueryAlias); ASTNode sqNewSearchCond = null; for(ASTNode conjunctAST : conjuncts) { @@ -545,6 +607,10 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { ASTNode sqExprForCorr = SubQueryUtils.createColRefAST(alias, exprAlias); if ( conjunct.getLeftExprType().refersSubQuery() ) { + if ( forHavingClause && conjunct.getRightOuterColInfo() != null ) { + rewriteCorrConjunctForHaving(conjunctAST, false, outerQueryAlias, + parentQueryRR, conjunct.getRightOuterColInfo()); + } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( conjunctAST, sqExprForCorr, true); joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); @@ -557,6 +623,10 @@ private void rewrite(RowResolver parentQueryRR) throws SemanticException { SubQueryUtils.addGroupExpressionToFront(gBy, conjunct.getLeftExpr()); } } else { + if ( forHavingClause && conjunct.getLeftOuterColInfo() != null ) { + rewriteCorrConjunctForHaving(conjunctAST, true, outerQueryAlias, + parentQueryRR, conjunct.getLeftOuterColInfo()); + } ASTNode joinPredciate = SubQueryUtils.alterCorrelatedPredicate( conjunctAST, sqExprForCorr, false); joinConditionAST = SubQueryUtils.andAST(joinConditionAST, joinPredciate); @@ -642,4 +712,21 @@ public ASTNode getJoinConditionAST() { public int getNumOfCorrelationExprsAddedToSQSelect() { return numOfCorrelationExprsAddedToSQSelect; } + + private void rewriteCorrConjunctForHaving(ASTNode conjunctASTNode, + boolean refersLeft, + String outerQueryAlias, + RowResolver outerQueryRR, + ColumnInfo outerQueryCol) { + + String newColAlias = "_gby_sq_col_" + numOuterCorrExprsForHaving++; + ASTNode outerExprForCorr = SubQueryUtils.createColRefAST(outerQueryAlias, newColAlias); + if ( refersLeft ) { + conjunctASTNode.setChild(0, outerExprForCorr); + } else { + conjunctASTNode.setChild(1, outerExprForCorr); + } + outerQueryRR.put(outerQueryAlias, newColAlias, outerQueryCol); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 908546e..1c5c7a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -96,8 +96,17 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { if (rowSchema.getSignature() == null) { rowSchema.setSignature(new ArrayList()); } - - rowSchema.getSignature().add(colInfo); + + /* + * allow multiple mappings to the same ColumnInfo. + * When a ColumnInfo is mapped multiple times, only the + * first inverse mapping is captured. + */ + boolean colPresent = invRslvMap.containsKey(colInfo.getInternalName()); + + if ( !colPresent ) { + rowSchema.getSignature().add(colInfo); + } LinkedHashMap f_map = rslvMap.get(tab_alias); if (f_map == null) { @@ -106,10 +115,12 @@ public void put(String tab_alias, String col_alias, ColumnInfo colInfo) { } f_map.put(col_alias, colInfo); - String[] qualifiedAlias = new String[2]; - qualifiedAlias[0] = tab_alias; - qualifiedAlias[1] = col_alias; - invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + if ( !colPresent ) { + String[] qualifiedAlias = new String[2]; + qualifiedAlias[0] = tab_alias; + qualifiedAlias[1] = col_alias; + invRslvMap.put(colInfo.getInternalName(), qualifiedAlias); + } } public boolean hasTableAlias(String tab_alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 67ad6cb..cb8f9e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1834,7 +1834,8 @@ private void parseJoinCondition(QBJoinTree joinTree, ASTNode joinCond, } @SuppressWarnings("nls") - private Operator genHavingPlan(String dest, QB qb, Operator input) + private Operator genHavingPlan(String dest, QB qb, Operator input, + Map aliasToOpInfo) throws SemanticException { ASTNode havingExpr = qb.getParseInfo().getHavingForClause(dest); @@ -1849,21 +1850,19 @@ private Operator genHavingPlan(String dest, QB qb, Operator input) } ASTNode condn = (ASTNode) havingExpr.getChild(0); - Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( - inputRR.getColumnInfos()), input), inputRR); - + Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true); + output = putOpInsertMap(output, inputRR); return output; } @SuppressWarnings("nls") - private Operator genFilterPlan(String dest, QB qb, Operator input, - Map aliasToOpInfo) + private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, + Map aliasToOpInfo, + boolean forHavingClause) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); - ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); /* * Handling of SubQuery Expressions: @@ -1887,7 +1886,6 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, * endif * endif */ - ASTNode searchCond = (ASTNode) whereExpr.getChild(0); List subQueriesInOriginalTree = SubQueryUtils.findSubQueries(searchCond); if ( subQueriesInOriginalTree.size() > 0 ) { @@ -1919,13 +1917,20 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, ASTNode subQueryAST = subQueries.get(i); ASTNode originalSubQueryAST = subQueriesInOriginalTree.get(i); - int sqIdx = i+1; + int sqIdx = qb.incrNumSubQueryPredicates(); clonedSearchCond = SubQueryUtils.rewriteParentQueryWhere(clonedSearchCond, subQueryAST); QBSubQuery subQuery = SubQueryUtils.buildSubQuery(qb.getId(), sqIdx, subQueryAST, originalSubQueryAST, ctx); + + String havingInputAlias = null; + + if ( forHavingClause ) { + havingInputAlias = "gby_sq" + sqIdx; + aliasToOpInfo.put(havingInputAlias, input); + } - subQuery.validateAndRewriteAST(inputRR); + subQuery.validateAndRewriteAST(inputRR, forHavingClause, havingInputAlias); QB qbSQ = new QB(subQuery.getOuterQueryId(), subQuery.getAlias(), true); qbSQ.setSubQueryDef(subQuery); @@ -1951,7 +1956,7 @@ private Operator genFilterPlan(String dest, QB qb, Operator input, /* * Gen Join between outer Operator and SQ op */ - subQuery.buildJoinCondition(inputRR, sqRR); + subQuery.buildJoinCondition(inputRR, sqRR, forHavingClause, havingInputAlias); QBJoinTree joinTree = genSQJoinTree(qb, subQuery, input, aliasToOpInfo); @@ -3114,8 +3119,10 @@ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, .getInternalName(), "", false)); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), null, false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), null, false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, input, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } // For each aggregation @@ -3323,8 +3330,10 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, .getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo, groupByOutputRowResolver); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -4105,8 +4114,10 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, exprInfo.getTabAlias(), exprInfo.getIsVirtualCol())); String field = getColumnInternalName(i); outputColumnNames.add(field); + ColumnInfo oColInfo = new ColumnInfo(field, exprInfo.getType(), "", false); groupByOutputRowResolver2.putExpression(grpbyExpr, - new ColumnInfo(field, exprInfo.getType(), "", false)); + oColInfo); + addAlternateGByKeyMappings(grpbyExpr, oColInfo, reduceSinkOperatorInfo2, groupByOutputRowResolver2); colExprMap.put(field, groupByKeys.get(groupByKeys.size() - 1)); } @@ -4322,14 +4333,15 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat curr = forwardOp; if (parseInfo.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, forwardOp, aliasToOpInfo); + ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false); } // Generate GroupbyOperator Operator groupByOperatorInfo = genGroupByPlanGroupByOperator(parseInfo, dest, curr, reduceSinkOperatorInfo, GroupByDesc.Mode.COMPLETE, null); - curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb); + curr = genPostGroupByBodyPlan(groupByOperatorInfo, dest, qb, aliasToOpInfo); } return curr; @@ -7689,7 +7701,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT curr = inputs.get(dest); if (qbp.getWhrForClause(dest) != null) { - curr = genFilterPlan(dest, qb, curr, aliasToOpInfo); + ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false); } if (qbp.getAggregationExprsForClause(dest).size() != 0 @@ -7716,7 +7729,7 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT } } - curr = genPostGroupByBodyPlan(curr, dest, qb); + curr = genPostGroupByBodyPlan(curr, dest, qb, aliasToOpInfo); } } else { curr = genGroupByPlan1ReduceMultiGBY(commonGroupByDestGroup, qb, input, aliasToOpInfo); @@ -7742,7 +7755,8 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT return inputs; } - private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) + private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, + Map aliasToOpInfo) throws SemanticException { QBParseInfo qbp = qb.getParseInfo(); @@ -7752,7 +7766,7 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb) if (getGroupByForClause(qbp, dest).size() == 0) { throw new SemanticException("HAVING specified without GROUP BY"); } - curr = genHavingPlan(dest, qb, curr); + curr = genHavingPlan(dest, qb, curr, aliasToOpInfo); } @@ -10741,4 +10755,40 @@ private Operator genReduceSinkPlanForWindowing(WindowingSpec spec, return selSpec; } + private void addAlternateGByKeyMappings(ASTNode gByExpr, ColumnInfo colInfo, + Operator reduceSinkOp, RowResolver gByRR) { + if ( gByExpr.getType() == HiveParser.DOT + && gByExpr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL ) { + String tab_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getChild(0).getText()); + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier( + gByExpr.getChild(1).getText()); + gByRR.put(tab_alias, col_alias, colInfo); + } else if ( gByExpr.getType() == HiveParser.TOK_TABLE_OR_COL ) { + String col_alias = BaseSemanticAnalyzer.unescapeIdentifier(gByExpr + .getChild(0).getText()); + String tab_alias = null; + /* + * If the input to the GBy has a tab alias for the column, then add an entry + * based on that tab_alias. + * For e.g. this query: + * select b.x, count(*) from t1 b group by x + * needs (tab_alias=b, col_alias=x) in the GBy RR. + * tab_alias=b comes from looking at the RowResolver that is the ancestor + * before any GBy/ReduceSinks added for the GBY operation. + */ + Operator parent = reduceSinkOp; + while ( parent instanceof ReduceSinkOperator || + parent instanceof GroupByOperator ) { + parent = parent.getParentOperators().get(0); + } + RowResolver parentRR = opParseCtx.get(parent).getRowResolver(); + try { + ColumnInfo pColInfo = parentRR.get(tab_alias, col_alias); + tab_alias = pColInfo == null ? null : pColInfo.getTabAlias(); + } catch(SemanticException se) { + } + gByRR.put(tab_alias, col_alias, colInfo); + } + } } diff --git ql/src/test/queries/clientpositive/groupby_resolution.q ql/src/test/queries/clientpositive/groupby_resolution.q new file mode 100644 index 0000000..a1fc18d --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_resolution.q @@ -0,0 +1,21 @@ + + +set hive.map.aggr=false; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=false; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=false; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; + +set hive.map.aggr=true; +set hive.groupby.skewindata=true; +explain select key, count(*) from src b group by b.key; +explain select b.key, count(*) from src b group by key; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_exists_having.q ql/src/test/queries/clientpositive/subquery_exists_having.q new file mode 100644 index 0000000..690aa10 --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_exists_having.q @@ -0,0 +1,60 @@ + + +-- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +-- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +; + +select * from cv1; + +-- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +; + +-- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/subquery_in_having.q ql/src/test/queries/clientpositive/subquery_in_having.q new file mode 100644 index 0000000..80f1aab --- /dev/null +++ ql/src/test/queries/clientpositive/subquery_in_having.q @@ -0,0 +1,53 @@ + +-- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part; + +-- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +; + + +select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key; + +select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +; + +-- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +; + +-- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +; \ No newline at end of file diff --git ql/src/test/results/clientpositive/groupby_resolution.q.out ql/src/test/results/clientpositive/groupby_resolution.q.out new file mode 100644 index 0000000..c426540 --- /dev/null +++ ql/src/test/results/clientpositive/groupby_resolution.q.out @@ -0,0 +1,688 @@ +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: complete + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partial1 + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: partials + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: +#### A masked pattern was here #### + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: final + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/subquery_exists_having.q.out ql/src/test/results/clientpositive/subquery_exists_having.q.out new file mode 100644 index 0000000..b3534aa --- /dev/null +++ ql/src/test/results/clientpositive/subquery_exists_having.q.out @@ -0,0 +1,290 @@ +PREHOOK: query: -- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- no agg, corr +explain +select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP exists) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (> (. (TOK_TABLE_OR_COL a) value) 'val_9'))))))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + sq_1:a + TableScan + alias: a + Filter Operator + predicate: + expr: (value > 'val_9') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select b.key, count(*) +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- view test +create view cv1 as +select b.key, count(*) as c +from src b +group by b.key +having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@cv1 +PREHOOK: query: select * from cv1 +PREHOOK: type: QUERY +PREHOOK: Input: default@cv1 +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from cv1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@cv1 +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- sq in from +select * +from (select b.key, count(*) + from src b + group by b.key + having exists + (select a.key + from src a + where a.key = b.key and a.value > 'val_9' + ) +) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- join on agg +select b.key, min(b.value) +from src b +group by b.key +having exists ( select a.key + from src a + where a.value > 'val_9' and a.value = min(b.value) + ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 val_90 +92 val_92 +95 val_95 +96 val_96 +97 val_97 +98 val_98 diff --git ql/src/test/results/clientpositive/subquery_in_having.q.out ql/src/test/results/clientpositive/subquery_in_having.q.out new file mode 100644 index 0000000..0d32e8c --- /dev/null +++ ql/src/test/results/clientpositive/subquery_in_having.q.out @@ -0,0 +1,755 @@ +PREHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- data setup +CREATE TABLE part( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@part +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +PREHOOK: type: LOAD +PREHOOK: Output: default@part +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part_tiny.txt' overwrite into table part +POSTHOOK: type: LOAD +POSTHOOK: Output: default@part +PREHOOK: query: -- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, non corr +explain + select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key > '9' group by s1.key ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL s1) key) '9')) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col1 + type: bigint + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: bigint + sort order: + + Map-reduce partition columns: + expr: _col0 + type: bigint + tag: 1 + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col1 + type: bigint + sort order: + + Map-reduce partition columns: + expr: _col1 + type: bigint + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + outputColumnNames: key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select s1.key, count(*) from src s1 where s1.key > '9' group by s1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, count(*) +from src +group by key +having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +409 3 +187 3 +403 3 +396 3 +384 3 +369 3 +498 3 +5 3 +35 3 +327 3 +167 3 +318 3 +316 3 +311 3 +298 3 +70 3 +90 3 +128 3 +273 3 +208 3 +199 3 +193 3 +0 3 +119 3 +480 3 +466 3 +454 3 +438 3 +431 3 +430 3 +417 3 +PREHOOK: query: -- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +PREHOOK: type: QUERY +POSTHOOK: query: -- non agg, corr +explain + select key, value, count(*) +from src b +group by key, value +having count(*) in (select count(*) from src s1 where s1.key > '9' and s1.value = b.value group by s1.key ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (and (> (. (TOK_TABLE_OR_COL s1) key) '9') (= (. (TOK_TABLE_OR_COL s1) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL s1) key)))) (TOK_FUNCTIONSTAR count))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: key, value + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: key + type: string + expr: value + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col2 + type: bigint + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col2 + type: bigint + expr: _col1 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: bigint + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: bigint + expr: _col1 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:s1 + TableScan + alias: s1 + Filter Operator + predicate: + expr: (key > '9') + type: boolean + Select Operator + expressions: + expr: value + type: string + expr: key + type: string + outputColumnNames: value, key + Group By Operator + aggregations: + expr: count() + bucketGroup: false + keys: + expr: value + type: string + expr: key + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col1 + type: string + sort order: ++ + Map-reduce partition columns: + expr: _col0 + type: string + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col2 + type: bigint + Reduce Operator Tree: + Group By Operator + aggregations: + expr: count(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + expr: KEY._col1 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: + expr: _col2 + type: bigint + expr: _col0 + type: string + outputColumnNames: _col0, _col1 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: bigint + expr: _col1 + type: string + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: -- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +PREHOOK: type: QUERY +POSTHOOK: query: -- agg, non corr +explain +select p_mfgr, avg(p_size) +from part b +group by b.p_mfgr +having b.p_mfgr in + (select p_mfgr + from part + group by p_mfgr + having max(p_size) - min(p_size) < 20 + ) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part) b)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL p_size)))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL b) p_mfgr)) (TOK_HAVING (TOK_SUBQUERY_EXPR (TOK_SUBQUERY_OP in) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME part))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL p_mfgr))) (TOK_GROUPBY (TOK_TABLE_OR_COL p_mfgr)) (TOK_HAVING (< (- (TOK_FUNCTION max (TOK_TABLE_OR_COL p_size)) (TOK_FUNCTION min (TOK_TABLE_OR_COL p_size))) 20)))) (. (TOK_TABLE_OR_COL b) p_mfgr))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-3 + Stage-3 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: + expr: avg(p_size) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: struct + Reduce Operator Tree: + Group By Operator + aggregations: + expr: avg(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + $INTNAME + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 0 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: double + $INTNAME1 + TableScan + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: 1 + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 + handleSkewJoin: false + outputColumnNames: _col0, _col1 + Filter Operator + predicate: + expr: (1 = 1) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: double + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + sq_1:part + TableScan + alias: part + Select Operator + expressions: + expr: p_mfgr + type: string + expr: p_size + type: int + outputColumnNames: p_mfgr, p_size + Group By Operator + aggregations: + expr: max(p_size) + expr: min(p_size) + bucketGroup: false + keys: + expr: p_mfgr + type: string + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: int + expr: _col2 + type: int + Reduce Operator Tree: + Group By Operator + aggregations: + expr: max(VALUE._col0) + expr: min(VALUE._col1) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Filter Operator + predicate: + expr: ((_col1 - _col2) < 20) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + outputColumnNames: _col0 + Group By Operator + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + + diff --git ql/src/test/results/clientpositive/subquery_multiinsert.q.out ql/src/test/results/clientpositive/subquery_multiinsert.q.out index 8dfb485..a917a13 100644 --- ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -189,7 +189,7 @@ STAGE PLANS: type: string expr: value type: string - sq_1:s1 + sq_2:s1 TableScan alias: s1 Filter Operator