diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 537af207ae..39cfb4f334 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -446,8 +446,17 @@ private static void processSetColsNode(ASTNode setCols, ASTSearcher searcher) { if (selExpr.getType() == HiveParser.QUERY_HINT) continue; assert selExpr.getType() == HiveParser.TOK_SELEXPR; assert selExpr.getChildCount() > 0; - // Examine the last child. It could be an alias. - Tree child = selExpr.getChild(selExpr.getChildCount() - 1); + + if (selExpr.getChildCount() > 1) { + if (extractAliases(selExpr, alias, newChildren, aliases)) { + continue; + } + setCols.token.setType(HiveParser.TOK_ALLCOLREF); + return; + } + + assert selExpr.getChildCount() == 1; + Tree child = selExpr.getChild(0); switch (child.getType()) { case HiveParser.TOK_SETCOLREF: // We have a nested setcolref. Process that and start from scratch TODO: use stack? @@ -504,6 +513,24 @@ private static void processSetColsNode(ASTNode setCols, ASTSearcher searcher) { } } + /** + * A UDTF can accept multiple aliases such as `AS (c1, c2, c3)`. + * In other cases, this selExpr should have only one alias like `AS c1` or `AS (c1)`. + * In case of a malformed query, it will fail later. + */ + private static boolean extractAliases(final Tree selExpr, final String alias, + final List newChildren, final HashSet aliases) { + // Skip the first child since it's an expression. + for (int i = 1; i < selExpr.getChildCount(); ++i) { + final Tree child = selExpr.getChild(i); + assert child.getType() == HiveParser.Identifier; + if (!createChildColumnRef(child, alias, newChildren, aliases)) { + return false; + } + } + return true; + } + private static boolean createChildColumnRef(Tree child, String alias, List newChildren, HashSet aliases) { String colAlias = child.getText(); diff --git ql/src/test/queries/clientnegative/udf_multiple_aliases.q ql/src/test/queries/clientnegative/udf_multiple_aliases.q new file mode 100644 index 0000000000..66b96a2b09 --- /dev/null +++ ql/src/test/queries/clientnegative/udf_multiple_aliases.q @@ -0,0 +1 @@ +SELECT isnull(null) AS (c1, c2); diff --git ql/src/test/queries/clientpositive/udtf_multiple_aliases.q ql/src/test/queries/clientpositive/udtf_multiple_aliases.q new file mode 100644 index 0000000000..9fa883a076 --- /dev/null +++ ql/src/test/queries/clientpositive/udtf_multiple_aliases.q @@ -0,0 +1,8 @@ +EXPLAIN +SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3); + +SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3); diff --git ql/src/test/results/clientnegative/udf_multiple_aliases.q.out ql/src/test/results/clientnegative/udf_multiple_aliases.q.out new file mode 100644 index 0000000000..1a99ebfd4f --- /dev/null +++ ql/src/test/results/clientnegative/udf_multiple_aliases.q.out @@ -0,0 +1 @@ +FAILED: SemanticException 1:28 AS clause has an invalid number of aliases. Error encountered near token 'c2' diff --git ql/src/test/results/clientpositive/udtf_multiple_aliases.q.out ql/src/test/results/clientpositive/udtf_multiple_aliases.q.out new file mode 100644 index 0000000000..dfcb571c5f --- /dev/null +++ ql/src/test/results/clientpositive/udtf_multiple_aliases.q.out @@ -0,0 +1,91 @@ +PREHOOK: query: EXPLAIN +SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 'a' (type: string), 'b' (type: string), 'c' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + function name: stack + Select Operator + expressions: col0 (type: string), col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 'd' (type: string), 'e' (type: string), 'f' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE + function name: stack + Select Operator + expressions: col0 (type: string), col1 (type: string), col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Union + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT stack(1, 'a', 'b', 'c') AS (c1, c2, c3) +UNION ALL +SELECT stack(1, 'd', 'e', 'f') AS (c1, c2, c3) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +a b c +d e f