diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java index 2e58b80..7043270 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/RowResolver.java @@ -195,17 +195,6 @@ public ColumnInfo get(String tab_alias, String col_alias) throws SemanticExcepti return ret; } - /** - * check if column name is already exist in RR - */ - public void checkColumn(String tableAlias, String columnAlias) throws SemanticException { - ColumnInfo prev = get(null, columnAlias); - if (prev != null && - (tableAlias == null || !tableAlias.equalsIgnoreCase(prev.getTabAlias()))) { - throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(columnAlias)); - } - } - public ArrayList getColumnInfos() { return rowSchema.getSignature(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 496f6a6..61f6ab6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -45,7 +45,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.common.StatsSetupConst.StatDB; @@ -2420,7 +2419,7 @@ private Operator genNotNullFilterForJoinSourcePlan(QB qb, Operator input, @SuppressWarnings("nls") private Integer genColListRegex(String colRegex, String tabAlias, ASTNode sel, ArrayList col_list, - RowResolver input, Integer pos, RowResolver output, List aliases, boolean subQuery) + RowResolver input, Integer pos, RowResolver output, List aliases) throws SemanticException { // The table alias should exist @@ -2478,9 +2477,6 @@ private Integer genColListRegex(String colRegex, String tabAlias, continue; } - if (subQuery) { - output.checkColumn(tmp[0], tmp[1]); - } ColumnInfo oColInfo = inputColsProcessed.get(colInfo); if (oColInfo == null) { ExprNodeColumnDesc expr = new ExprNodeColumnDesc(colInfo.getType(), @@ -3106,7 +3102,6 @@ private static boolean isRegex(String pattern, HiveConf conf) { posn++; } - boolean subQuery = qb.getParseInfo().getIsSubQ(); boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); if (isInTransform) { @@ -3144,7 +3139,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { } if (isUDTF && (selectStar = udtfExprType == HiveParser.TOK_FUNCTIONSTAR)) { genColListRegex(".*", null, (ASTNode) udtfExpr.getChild(0), - col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); + col_list, inputRR, pos, out_rwsch, qb.getAliases()); } } @@ -3266,7 +3261,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { if (expr.getType() == HiveParser.TOK_ALLCOLREF) { pos = genColListRegex(".*", expr.getChildCount() == 0 ? null : getUnescapedName((ASTNode) expr.getChild(0)).toLowerCase(), - expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); + expr, col_list, inputRR, pos, out_rwsch, qb.getAliases()); selectStar = true; } else if (expr.getType() == HiveParser.TOK_TABLE_OR_COL && !hasAsClause && !inputRR.getIsExprResolver() @@ -3275,7 +3270,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { // This can only happen without AS clause // We don't allow this for ExprResolver - the Group By case pos = genColListRegex(unescapeIdentifier(expr.getChild(0).getText()), - null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases(), subQuery); + null, expr, col_list, inputRR, pos, out_rwsch, qb.getAliases()); } else if (expr.getType() == HiveParser.DOT && expr.getChild(0).getType() == HiveParser.TOK_TABLE_OR_COL && inputRR.hasTableAlias(unescapeIdentifier(expr.getChild(0) @@ -3288,7 +3283,7 @@ private static boolean isRegex(String pattern, HiveConf conf) { pos = genColListRegex(unescapeIdentifier(expr.getChild(1).getText()), unescapeIdentifier(expr.getChild(0).getChild(0).getText() .toLowerCase()), expr, col_list, inputRR, pos, out_rwsch, - qb.getAliases(), subQuery); + qb.getAliases()); } else { // Case when this is an expression TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); @@ -3302,9 +3297,6 @@ private static boolean isRegex(String pattern, HiveConf conf) { colAlias = recommended; } col_list.add(exp); - if (subQuery) { - out_rwsch.checkColumn(tabAlias, colAlias); - } ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(pos), exp.getWritableObjectInspector(), tabAlias, false); @@ -8505,6 +8497,9 @@ private Operator genPostGroupByBodyPlan(Operator curr, String dest, QB qb, // ast expression is not a valid column name for table tmp[1] = colInfo.getInternalName(); } + if (newRR.get(alias, tmp[1]) != null) { + throw new SemanticException(ErrorMsg.AMBIGUOUS_COLUMN.getMsg(tmp[1] + " in " + alias)); + } newRR.put(alias, tmp[1], colInfo); } opParseCtx.get(curr).setRowResolver(newRR); diff --git ql/src/test/queries/clientpositive/complex_alias.q ql/src/test/queries/clientpositive/complex_alias.q new file mode 100644 index 0000000..e2810c3 --- /dev/null +++ ql/src/test/queries/clientpositive/complex_alias.q @@ -0,0 +1,46 @@ +CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE); + +INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows); + +EXPLAIN +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ); + +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ); diff --git ql/src/test/results/clientnegative/ambiguous_col.q.out ql/src/test/results/clientnegative/ambiguous_col.q.out index 237c21f..a2915a4 100644 --- ql/src/test/results/clientnegative/ambiguous_col.q.out +++ ql/src/test/results/clientnegative/ambiguous_col.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10007]: Ambiguous column reference key +FAILED: SemanticException [Error 10007]: Ambiguous column reference key in a diff --git ql/src/test/results/clientnegative/ambiguous_col0.q.out ql/src/test/results/clientnegative/ambiguous_col0.q.out index 237c21f..b59e828 100644 --- ql/src/test/results/clientnegative/ambiguous_col0.q.out +++ ql/src/test/results/clientnegative/ambiguous_col0.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10007]: Ambiguous column reference key +FAILED: SemanticException [Error 10007]: Ambiguous column reference key in t diff --git ql/src/test/results/clientnegative/ambiguous_col1.q.out ql/src/test/results/clientnegative/ambiguous_col1.q.out index 237c21f..b59e828 100644 --- ql/src/test/results/clientnegative/ambiguous_col1.q.out +++ ql/src/test/results/clientnegative/ambiguous_col1.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10007]: Ambiguous column reference key +FAILED: SemanticException [Error 10007]: Ambiguous column reference key in t diff --git ql/src/test/results/clientnegative/ambiguous_col2.q.out ql/src/test/results/clientnegative/ambiguous_col2.q.out index 237c21f..b59e828 100644 --- ql/src/test/results/clientnegative/ambiguous_col2.q.out +++ ql/src/test/results/clientnegative/ambiguous_col2.q.out @@ -1 +1 @@ -FAILED: SemanticException [Error 10007]: Ambiguous column reference key +FAILED: SemanticException [Error 10007]: Ambiguous column reference key in t diff --git ql/src/test/results/clientpositive/ambiguous_col.q.out ql/src/test/results/clientpositive/ambiguous_col.q.out index e8760f1..d583162 100644 --- ql/src/test/results/clientpositive/ambiguous_col.q.out +++ ql/src/test/results/clientpositive/ambiguous_col.q.out @@ -53,8 +53,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 + expressions: _col0 (type: string), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -124,8 +124,8 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -195,8 +195,8 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/complex_alias.q.out ql/src/test/results/clientpositive/complex_alias.q.out new file mode 100644 index 0000000..d8264bd --- /dev/null +++ ql/src/test/results/clientpositive/complex_alias.q.out @@ -0,0 +1,269 @@ +PREHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@agg1 +POSTHOOK: query: CREATE TABLE agg1 (col0 INT, col1 STRING, col2 DOUBLE) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@agg1 +PREHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@agg1 +POSTHOOK: query: INSERT INTO TABLE agg1 select key,value,key from src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@agg1 +POSTHOOK: Lineage: agg1.col0 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: agg1.col1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: agg1.col2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +Warning: Shuffle Join JOIN[19][tables = [single_use_subq12, single_use_subq11]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: agg1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col0 = col0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: '42' (type: string), col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col2 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + TableScan + alias: agg1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (col0 = col0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: '41' (type: string), col0 (type: int) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Union + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: int), _col1 (type: string) + outputColumnNames: _col2, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col2 (type: int), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col1 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {VALUE._col1} {VALUE._col2} + outputColumnNames: _col2, _col3 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col2 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: agg1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: col0 (type: int), col2 (type: double) + outputColumnNames: col0, col2 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(col2) + keys: col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 17 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[19][tables = [single_use_subq12, single_use_subq11]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@agg1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT single_use_subq11.a1 AS a1, + single_use_subq11.a2 AS a2 +FROM (SELECT Sum(agg1.col2) AS a1 + FROM agg1 + GROUP BY agg1.col0) single_use_subq12 + JOIN (SELECT alias.a2 AS a0, + alias.a1 AS a1, + alias.a1 AS a2 + FROM (SELECT agg1.col1 AS a0, + '42' AS a1, + agg1.col0 AS a2 + FROM agg1 + UNION ALL + SELECT agg1.col1 AS a0, + '41' AS a1, + agg1.col0 AS a2 + FROM agg1) alias + GROUP BY alias.a2, + alias.a1) single_use_subq11 + ON ( single_use_subq11.a0 = single_use_subq11.a0 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@agg1 +#### A masked pattern was here #### +42 42 +41 41