diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 1b6b33b..92cbabc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -494,9 +494,6 @@ public QBSubQuery(String outerQueryId, public ASTNode getSubQueryAST() { return subQueryAST; } - public ASTNode getOuterQueryExpression() { - return parentQueryExpression; - } public SubQueryTypeDef getOperator() { return operator; } @@ -526,15 +523,8 @@ void validateAndRewriteAST(RowResolver outerQueryRR, /* * Restriction.16.s :: Correlated Expression in Outer Query must not contain * unqualified column references. + * disabled : if it's obvious, we allow unqualified refs */ - if ( parentQueryExpression != null && !forHavingClause ) { - ASTNode u = SubQueryUtils.hasUnQualifiedColumnReferences(parentQueryExpression); - if ( u != null ) { - subQueryAST.setOrigin(originalSQASTOrigin); - throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( - u, "Correlating expression cannot contain unqualified column references.")); - } - } /* * Restriction 17.s :: SubQuery cannot use the same table alias as one used in @@ -664,12 +654,30 @@ void buildJoinCondition(RowResolver outerQueryRR, RowResolver sqRR, try { outerQueryCol = outerQueryRR.getExpression(parentQueryExpression); } catch(SemanticException se) { + // ignore } + ASTNode parentExpr = parentQueryExpression; + if (!forHavingClause) { + Set aliases = outerQueryRR.getRslvMap().keySet(); + if (notInCheck != null) { + aliases.remove(notInCheck.getAlias()); + } + String tableAlias = aliases.size() == 1 ? aliases.iterator().next() : null; + parentExpr = + SubQueryUtils.setQualifiedColumnReferences(parentExpr, tableAlias); + if (parentExpr == null) { + subQueryAST.setOrigin(originalSQASTOrigin); + throw new SemanticException(ErrorMsg.UNSUPPORTED_SUBQUERY_EXPRESSION.getMsg( + parentQueryExpression, + "Correlating expression contains ambiguous column references.")); + } + } + parentQueryJoinCond = SubQueryUtils.buildOuterQryToSQJoinCond( - getOuterQueryExpression(), - alias, - sqRR); + parentExpr, + alias, + sqRR); if ( outerQueryCol != null ) { rewriteCorrConjunctForHaving(parentQueryJoinCond, true, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java index 57868b7..87a7ced 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryUtils.java @@ -316,6 +316,32 @@ else if ( type == HiveParser.TOK_TABLE_OR_COL ) { } return null; } + + static ASTNode setQualifiedColumnReferences(ASTNode ast, String tableAlias) { + int type = ast.getType(); + if (type == HiveParser.DOT) { + return ast; + } + if (type == HiveParser.TOK_TABLE_OR_COL) { + if (tableAlias == null) { + return null; + } + String colName = SemanticAnalyzer.unescapeIdentifier(ast.getChild(0).getText()); + return SubQueryUtils.createColRefAST(tableAlias, colName); + } + + for (int i = 0; i < ast.getChildCount(); i++) { + ASTNode child = (ASTNode) ast.getChild(i); + ASTNode c = setQualifiedColumnReferences(child, tableAlias); + if (c == null) { + return null; + } + if (c != child) { + ast.setChild(i, c); + } + } + return ast; + } static ASTNode subQueryWhere(ASTNode insertClause) { if (insertClause.getChildCount() > 2 && @@ -335,7 +361,7 @@ static ASTNode buildOuterQryToSQJoinCond(ASTNode outerQueryExpr, RowResolver sqRR) { ASTNode node = (ASTNode) ParseDriver.adaptor.create(HiveParser.EQUAL, "="); node.addChild(outerQueryExpr); - node.addChild(buildSQJoinExpr(sqAlias, sqRR, false)); + node.addChild(buildSQJoinExpr(sqAlias, sqRR)); return node; } @@ -345,18 +371,16 @@ static ASTNode buildOuterQryToSQJoinCond(ASTNode outerQueryExpr, * this will build (. (TOK_TABLE_OR_COL Identifier[SQ_1]) Identifier[B]) * where 'SQ_1' is the alias generated for the SubQuery. */ - static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR, - boolean useInternalName) { + static ASTNode buildSQJoinExpr(String sqAlias, RowResolver sqRR) { List signature = sqRR.getRowSchema().getSignature(); ColumnInfo joinColumn = signature.get(0); String[] joinColName = sqRR.reverseLookup(joinColumn.getInternalName()); - return createColRefAST(sqAlias, useInternalName ? - joinColumn.getInternalName() : joinColName[1]); + return createColRefAST(sqAlias, joinColName[1]); } static ASTNode buildOuterJoinPostCond(String sqAlias, RowResolver sqRR) { - return isNull(buildSQJoinExpr(sqAlias, sqRR, false)); + return isNull(buildSQJoinExpr(sqAlias, sqRR)); } @SuppressWarnings("rawtypes") diff --git a/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q b/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q deleted file mode 100644 index 99ff9ca..0000000 --- a/ql/src/test/queries/clientnegative/subquery_unqual_corr_expr.q +++ /dev/null @@ -1,6 +0,0 @@ - - -select * -from src -where key in (select key from src) -; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/subquery_unqual_corr_expr.q b/ql/src/test/queries/clientpositive/subquery_unqual_corr_expr.q new file mode 100644 index 0000000..1d766a1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/subquery_unqual_corr_expr.q @@ -0,0 +1,8 @@ + +explain +select * from src tablesample (10 rows) where lower(key) in (select key from src); +select * from src tablesample (10 rows) where lower(key) in (select key from src); + +explain +select * from src tablesample (10 rows) where concat(key,value) not in (select key from src); +select * from src tablesample (10 rows) where concat(key,value) not in (select key from src); diff --git a/ql/src/test/results/clientnegative/subquery_unqual_corr_expr.q.out b/ql/src/test/results/clientnegative/subquery_unqual_corr_expr.q.out deleted file mode 100644 index f69a538..0000000 --- a/ql/src/test/results/clientnegative/subquery_unqual_corr_expr.q.out +++ /dev/null @@ -1 +0,0 @@ -FAILED: SemanticException [Error 10249]: Line 5:6 Unsupported SubQuery Expression 'key': Correlating expression cannot contain unqualified column references. diff --git a/ql/src/test/results/clientpositive/subquery_unqual_corr_expr.q.out b/ql/src/test/results/clientpositive/subquery_unqual_corr_expr.q.out new file mode 100644 index 0000000..e3dd5ed --- /dev/null +++ b/ql/src/test/results/clientpositive/subquery_unqual_corr_expr.q.out @@ -0,0 +1,249 @@ +PREHOOK: query: explain +select * from src tablesample (10 rows) where lower(key) in (select key from src) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from src tablesample (10 rows) where lower(key) in (select key from src) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: lower(key) is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: lower(key) (type: string) + sort order: + + Map-reduce partition columns: lower(key) (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 lower(key) (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from src tablesample (10 rows) where lower(key) in (select key from src) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src tablesample (10 rows) where lower(key) in (select key from src) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +27 val_27 +278 val_278 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98 +Warning: Shuffle Join JOIN[16][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: explain +select * from src tablesample (10 rows) where concat(key,value) not in (select key from src) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from src tablesample (10 rows) where concat(key,value) not in (select key from src) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is null (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Group By Operator + keys: _col0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: concat(_col0, _col1) (type: string) + sort order: + + Map-reduce partition columns: concat(_col0, _col1) (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join0 to 1 + keys: + 0 concat(_col0, _col1) (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col5 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is null (type: boolean) + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 302 Data size: 3208 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[16][tables = [src, sq_1_notin_nullcheck]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: select * from src tablesample (10 rows) where concat(key,value) not in (select key from src) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src tablesample (10 rows) where concat(key,value) not in (select key from src) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +165 val_165 +238 val_238 +255 val_255 +278 val_278 +27 val_27 +311 val_311 +409 val_409 +484 val_484 +86 val_86 +98 val_98