diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java index 02c4be9..3c5e9bc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBParseInfo.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -45,7 +46,7 @@ private final HashMap aliasToSrc; private final HashMap nameToDest; private final HashMap nameToSample; - private final Map exprToColumnAlias; + private final Map> destToAliases; private final Map destToSelExpr; private final HashMap destToWhereExpr; private final HashMap destToGroupby; @@ -112,7 +113,7 @@ public QBParseInfo(String alias, boolean isSubQ) { aliasToSrc = new HashMap(); nameToDest = new HashMap(); nameToSample = new HashMap(); - exprToColumnAlias = new HashMap(); + destToAliases = new HashMap>(); destToLateralView = new HashMap(); destToSelExpr = new LinkedHashMap(); destToWhereExpr = new HashMap(); @@ -142,25 +143,13 @@ public QBParseInfo(String alias, boolean isSubQ) { } - /* - * If a QB is such that the aggregation expressions need to be handled by - * the Windowing PTF; we invoke this function to clear the AggExprs on the dest. - */ - public void clearAggregationExprsForClause(String clause) { - destToAggregationExprs.get(clause).clear(); - } - - public void setAggregationExprsForClause(String clause, - LinkedHashMap aggregationTrees) { - destToAggregationExprs.put(clause, aggregationTrees); - } - public void addAggregationExprsForClause(String clause, LinkedHashMap aggregationTrees) { - if (destToAggregationExprs.containsKey(clause)) { - destToAggregationExprs.get(clause).putAll(aggregationTrees); - } else { + LinkedHashMap map = destToAggregationExprs.get(clause); + if (map == null) { destToAggregationExprs.put(clause, aggregationTrees); + } else { + map.putAll(aggregationTrees); } } @@ -206,8 +195,12 @@ public void clearDistinctFuncExprsForClause(String clause) { } } - public void setDistinctFuncExprsForClause(String clause, List ast) { - destToDistinctFuncExprs.put(clause, ast); + public void addDistinctFuncExprsForClause(String clause, List ast) { + List distincts = destToDistinctFuncExprs.get(clause); + if (distincts == null) { + destToDistinctFuncExprs.put(clause, distincts = new ArrayList()); + } + distincts.addAll(ast); } public List getDistinctFuncExprsForClause(String clause) { @@ -411,20 +404,17 @@ public void setTabSample(String alias, TableSample tableSample) { nameToSample.put(alias.toLowerCase(), tableSample); } - public String getExprToColumnAlias(ASTNode expr) { - return exprToColumnAlias.get(expr); - } - - public Map getAllExprToColumnAlias() { - return exprToColumnAlias; + public Map getAllAliasedColumnExprs(String dest) { + Map aliasedColumns = destToAliases.get(dest); + return aliasedColumns == null ? Collections.emptyMap() : aliasedColumns; } - public boolean hasExprToColumnAlias(ASTNode expr) { - return exprToColumnAlias.containsKey(expr); - } - - public void setExprToColumnAlias(ASTNode expr, String alias) { - exprToColumnAlias.put(expr, alias); + public void addAliasedColumnExpr(String dest, String alias, ASTNode expr) { + Map map = destToAliases.get(dest); + if (map == null) { + destToAliases.put(dest, map = new HashMap()); + } + map.put(alias, expr); } public void setDestLimit(String dest, Integer limit) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1b7a41d..0121a52 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -538,26 +538,32 @@ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias) } } - private LinkedHashMap doPhase1GetAggregationsFromSelect( - ASTNode selExpr, QB qb, String dest) throws SemanticException { + private void doPhase1GetAggregations(ASTNode exprs, QB qb, String dest) + throws SemanticException { + QBParseInfo qbp = qb.getParseInfo(); // Iterate over the selects search for aggregation Trees. // Use String as keys to eliminate duplicate trees. - LinkedHashMap aggregationTrees = new LinkedHashMap(); + LinkedHashMap aggregationsTrees = new LinkedHashMap(); List wdwFns = new ArrayList(); - for (int i = 0; i < selExpr.getChildCount(); ++i) { - ASTNode function = (ASTNode) selExpr.getChild(i); + for (int i = 0; i < exprs.getChildCount(); ++i) { + ASTNode function = (ASTNode) exprs.getChild(i); if (function.getType() == HiveParser.TOK_SELEXPR || function.getType() == HiveParser.TOK_SUBQUERY_EXPR) { function = (ASTNode)function.getChild(0); } - doPhase1GetAllAggregations(function, aggregationTrees, wdwFns); + if (!doPhase1GetAllAggregations(function, aggregationsTrees, wdwFns) && + function.getType() == HiveParser.TOK_SELEXPR && function.getChildCount() == 2) { + // (hack) to resolve having clause referencing aliases in select clause + String alias = function.getChild(1).getText(); + qbp.addAliasedColumnExpr(dest, unescapeIdentifier(alias), function); + } } // window based aggregations are handled differently for (ASTNode wdwFn : wdwFns) { WindowingSpec spec = qb.getWindowingSpec(dest); - if(spec == null) { + if (spec == null) { queryProperties.setHasWindowing(true); spec = new WindowingSpec(); qb.addDestToWindowingSpec(dest, spec); @@ -575,20 +581,8 @@ public void doPhase1QBExpr(ASTNode ast, QBExpr qbexpr, String id, String alias) spec.addWindowFunction(wFnSpec); qb.getParseInfo().addWindowingExprToClause(dest, wFnSpec.getExpression()); } - - return aggregationTrees; - } - - private void doPhase1GetColumnAliasesFromSelect( - ASTNode selectExpr, QBParseInfo qbp) { - for (int i = 0; i < selectExpr.getChildCount(); ++i) { - ASTNode selExpr = (ASTNode) selectExpr.getChild(i); - if ((selExpr.getToken().getType() == HiveParser.TOK_SELEXPR) - && (selExpr.getChildCount() == 2)) { - String columnAlias = unescapeIdentifier(selExpr.getChild(1).getText()); - qbp.setExprToColumnAlias((ASTNode) selExpr.getChild(0), columnAlias); - } - } + qbp.addAggregationExprsForClause(dest, aggregationsTrees); + qbp.addDistinctFuncExprsForClause(dest, doPhase1GetDistinctFuncExprs(aggregationsTrees)); } /** @@ -601,8 +595,8 @@ private void doPhase1GetColumnAliasesFromSelect( * the aggregation subtree. * @throws SemanticException */ - private void doPhase1GetAllAggregations(ASTNode expressionTree, - HashMap aggregations, List wdwFns) throws SemanticException { + private boolean doPhase1GetAllAggregations(ASTNode expressionTree, + Map aggregations, List wdwFns) throws SemanticException { int exprTokenType = expressionTree.getToken().getType(); if (exprTokenType == HiveParser.TOK_FUNCTION || exprTokenType == HiveParser.TOK_FUNCTIONDI @@ -611,12 +605,12 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, if (expressionTree.getChild(expressionTree.getChildCount()-1).getType() == HiveParser.TOK_WINDOWSPEC) { wdwFns.add(expressionTree); - return; + return true; } - if (expressionTree.getChild(0).getType() == HiveParser.Identifier) { - String functionName = unescapeIdentifier(expressionTree.getChild(0) - .getText()); - if(FunctionRegistry.impliesOrder(functionName)) { + ASTNode child = (ASTNode) expressionTree.getChild(0); + if (child.getType() == HiveParser.Identifier) { + String functionName = unescapeIdentifier(child.getText()); + if (FunctionRegistry.impliesOrder(functionName)) { throw new SemanticException(ErrorMsg.MISSING_OVER_CLAUSE.getMsg(functionName)); } if (FunctionRegistry.getGenericUDAFResolver(functionName) != null) { @@ -626,17 +620,18 @@ private void doPhase1GetAllAggregations(ASTNode expressionTree, aggregations.put(expressionTree.toStringTree().toLowerCase(), expressionTree); FunctionInfo fi = FunctionRegistry.getFunctionInfo(functionName); if (!fi.isNative()) { - unparseTranslator.addIdentifierTranslation((ASTNode) expressionTree - .getChild(0)); + unparseTranslator.addIdentifierTranslation(child); } - return; + return false; } } } + boolean containsWindow = false; for (int i = 0; i < expressionTree.getChildCount(); i++) { - doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), + containsWindow |= doPhase1GetAllAggregations((ASTNode) expressionTree.getChild(i), aggregations, wdwFns); } + return containsWindow; } private List doPhase1GetDistinctFuncExprs( @@ -1217,15 +1212,11 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) posn++; } - if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) + if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) { queryProperties.setUsesScript(true); + } - LinkedHashMap aggregations = doPhase1GetAggregationsFromSelect(ast, - qb, ctx_1.dest); - doPhase1GetColumnAliasesFromSelect(ast, qbp); - qbp.setAggregationExprsForClause(ctx_1.dest, aggregations); - qbp.setDistinctFuncExprsForClause(ctx_1.dest, - doPhase1GetDistinctFuncExprs(aggregations)); + doPhase1GetAggregations(ast, qb, ctx_1.dest); break; case HiveParser.TOK_WHERE: @@ -1368,8 +1359,7 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) case HiveParser.TOK_HAVING: qbp.setHavingExprForClause(ctx_1.dest, ast); - qbp.addAggregationExprsForClause(ctx_1.dest, - doPhase1GetAggregationsFromSelect(ast, qb, ctx_1.dest)); + doPhase1GetAggregations(ast, qb, ctx_1.dest); break; case HiveParser.KW_WINDOW: @@ -2440,10 +2430,14 @@ private Operator genHavingPlan(String dest, QB qb, Operator input, OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); - Map exprToColumnAlias = qb.getParseInfo().getAllExprToColumnAlias(); - for (ASTNode astNode : exprToColumnAlias.keySet()) { - if (inputRR.getExpression(astNode) != null) { - inputRR.put("", exprToColumnAlias.get(astNode), inputRR.getExpression(astNode)); + Map aliasToColumnExpr = qb.getParseInfo().getAllAliasedColumnExprs(dest); + + // (hack) to resolve having clause referencing aliases in select clause + Map resolved = new HashMap(); + for (Map.Entry entry : aliasToColumnExpr.entrySet()) { + ExprNodeDesc exprNodeDesc = genExprNodeDesc(entry.getValue(), inputRR); + if (exprNodeDesc != null) { + resolved.put(entry.getKey(), exprNodeDesc); } } ASTNode condn = (ASTNode) havingExpr.getChild(0); @@ -2453,7 +2447,7 @@ private Operator genHavingPlan(String dest, QB qb, Operator input, * so we invoke genFilterPlan to handle SubQuery algebraic transformation, * just as is done for SubQuery predicates appearing in the Where Clause. */ - Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, true); + Operator output = genFilterPlan(condn, qb, input, aliasToOpInfo, resolved); output = putOpInsertMap(output, inputRR); return output; } @@ -2472,12 +2466,14 @@ private Operator genPlanForSubQueryPredicate( @SuppressWarnings("nls") private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, Map aliasToOpInfo, - boolean forHavingClause) + Map resolved) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); + boolean forHavingClause = resolved != null; + /* * Handling of SubQuery Expressions: * if "Where clause contains no SubQuery expressions" then @@ -2614,7 +2610,7 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, } } - return genFilterPlan(qb, searchCond, input); + return genFilterPlan(qb, searchCond, input, resolved); } /** @@ -2628,13 +2624,13 @@ private Operator genFilterPlan(ASTNode searchCond, QB qb, Operator input, * the input operator */ @SuppressWarnings("nls") - private Operator genFilterPlan(QB qb, ASTNode condn, Operator input) - throws SemanticException { + private Operator genFilterPlan(QB qb, ASTNode condn, Operator input, + Map resolved) throws SemanticException { OpParseContext inputCtx = opParseCtx.get(input); RowResolver inputRR = inputCtx.getRowResolver(); Operator output = putOpInsertMap(OperatorFactory.getAndMakeChild( - new FilterDesc(genExprNodeDesc(condn, inputRR), false), new RowSchema( + new FilterDesc(genExprNodeDesc(condn, inputRR, resolved), false), new RowSchema( inputRR.getColumnInfos()), input), inputRR); if (LOG.isDebugEnabled()) { @@ -5100,7 +5096,7 @@ private Operator genGroupByPlan1ReduceMultiGBY(List dests, QB qb, Operat if (parseInfo.getWhrForClause(dest) != null) { ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, false); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, forwardOp, aliasToOpInfo, null); } // Generate GroupbyOperator @@ -7332,7 +7328,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, if ( joinSrcOp != null ) { ArrayList filter = joinTree.getFiltersForPushing().get(0); for (ASTNode cond : filter) { - joinSrcOp = genFilterPlan(qb, cond, joinSrcOp); + joinSrcOp = genFilterPlan(qb, cond, joinSrcOp, null); } } @@ -7387,7 +7383,7 @@ private Operator genJoinOperator(QB qb, QBJoinTree joinTree, Operator op = joinOp; for(ASTNode condn : joinTree.getPostJoinFilters() ) { - op = genFilterPlan(qb, condn, op); + op = genFilterPlan(qb, condn, op, null); } return op; } @@ -7556,7 +7552,7 @@ private void pushJoinFilters(QB qb, QBJoinTree joinTree, Operator srcOp = map.get(src); ArrayList filter = filters.get(pos); for (ASTNode cond : filter) { - srcOp = genFilterPlan(qb, cond, srcOp); + srcOp = genFilterPlan(qb, cond, srcOp, null); } map.put(src, srcOp); } @@ -8765,7 +8761,7 @@ private Operator genBodyPlan(QB qb, Operator input, Map aliasT if (qbp.getWhrForClause(dest) != null) { ASTNode whereExpr = qb.getParseInfo().getWhrForClause(dest); - curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, false); + curr = genFilterPlan((ASTNode) whereExpr.getChild(0), qb, curr, aliasToOpInfo, null); } if (qbp.getAggregationExprsForClause(dest).size() != 0 @@ -10272,11 +10268,16 @@ private void saveViewDefinition() throws SemanticException { /** * Generates an expression node descriptor for the expression with TypeCheckCtx. */ - public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input) throws SemanticException { + return genExprNodeDesc(expr, input, (Map)null); + } + + public ExprNodeDesc genExprNodeDesc(ASTNode expr, RowResolver input, Map resolved) throws SemanticException { // Since the user didn't supply a customized type-checking context, // use default settings. TypeCheckCtx tcCtx = new TypeCheckCtx(input); + tcCtx.setColMapping(resolved); return genExprNodeDesc(expr, input, tcCtx); } @@ -14184,7 +14185,9 @@ private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) throws OptiqSemanticException { QBParseInfo qbPI = qb.getParseInfo(); - Map exprToAlias = qbPI.getAllExprToColumnAlias(); + + String destName = qbPI.getClauseNames().iterator().next(); + Map aliasToExprs = qbPI.getAllAliasedColumnExprs(destName); /* * a mouthful, but safe: * - a QB is guaranteed to have atleast 1 destination @@ -14193,12 +14196,12 @@ private void validateNoHavingReferenceToAlias(QB qb, ASTNode havingExpr) Set aggExprs = qbPI.getDestToAggregationExprs().values() .iterator().next().keySet(); - for (Map.Entry selExpr : exprToAlias.entrySet()) { - ASTNode selAST = selExpr.getKey(); + for (Map.Entry selExpr : aliasToExprs.entrySet()) { + final ASTNode selAST = selExpr.getValue(); if (!aggExprs.contains(selAST.toStringTree().toLowerCase())) { continue; } - final String aliasToCheck = selExpr.getValue(); + final String aliasToCheck = selExpr.getKey(); final Set aliasReferences = new HashSet(); TreeVisitorAction action = new TreeVisitorAction() { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java index 3b6178f..1fe25ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckCtx.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.ql.parse; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; + +import java.util.Map; /** * This class implements the context information that is used for typechecking @@ -48,6 +51,11 @@ private ASTNode errorSrcNode; /** + * column alias to ExprNodeDesc, which is resolved already + */ + private Map colMapping; + + /** * Whether to allow stateful UDF invocations. */ private boolean allowStatefulFunctions; @@ -190,4 +198,12 @@ public boolean getallowIndexExpr() { public boolean getallowSubQueryExpr() { return allowSubQueryExpr; } + + public ExprNodeDesc getColMapping(String alias) { + return colMapping == null ? null : colMapping.get(alias); + } + + public void setColMapping(Map colMapping) { + this.colMapping = colMapping; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index e065983..49ef328 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -510,6 +510,10 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, } } else { if (colInfo == null) { + ExprNodeDesc resolved = ctx.getColMapping(tableOrCol); + if (resolved != null) { + return resolved; + } // It's not a column or a table alias. if (input.getIsExprResolver()) { ASTNode exprNode = expr; diff --git ql/src/test/queries/clientpositive/having3.q ql/src/test/queries/clientpositive/having3.q new file mode 100644 index 0000000..c28c080 --- /dev/null +++ ql/src/test/queries/clientpositive/having3.q @@ -0,0 +1,4 @@ +explain +select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0; + +select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0; diff --git ql/src/test/results/clientpositive/having3.q.out ql/src/test/results/clientpositive/having3.q.out new file mode 100644 index 0000000..ff7a389 --- /dev/null +++ ql/src/test/results/clientpositive/having3.q.out @@ -0,0 +1,80 @@ +PREHOOK: query: explain +select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), key (type: string) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(key), min(key) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col1 - _col2) >= 0) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), (_col1 - _col2) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value,max(key)-min(key) as span from src tablesample (10 rows) group by value having span>=0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_165 0.0 +val_238 0.0 +val_255 0.0 +val_27 0.0 +val_278 0.0 +val_311 0.0 +val_409 0.0 +val_484 0.0 +val_86 0.0 +val_98 0.0