diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g index ad9abce..bf35d60 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g @@ -222,7 +222,7 @@ subQuerySource @init { gParent.pushMsg("subquery source", state); } @after { gParent.popMsg(state); } : - LPAREN queryStatementExpression[false] RPAREN KW_AS? identifier -> ^(TOK_SUBQUERY queryStatementExpression identifier) + LPAREN queryStatementExpression RPAREN KW_AS? identifier -> ^(TOK_SUBQUERY queryStatementExpression identifier) ; //---------------------- Rules for parsing PTF clauses ----------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index bf78545..bef3acf 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -717,7 +717,7 @@ explainStatement : KW_EXPLAIN ( explainOption* execStatement -> ^(TOK_EXPLAIN execStatement explainOption*) | - KW_REWRITE queryStatementExpression[true] -> ^(TOK_EXPLAIN_SQ_REWRITE queryStatementExpression)) + KW_REWRITE queryStatementExpression -> ^(TOK_EXPLAIN_SQ_REWRITE queryStatementExpression)) ; explainOption @@ -729,7 +729,7 @@ explainOption execStatement @init { pushMsg("statement", state); } @after { popMsg(state); } - : queryStatementExpression[true] + : queryStatementExpression | loadStatement | exportStatement | importStatement @@ -2310,14 +2310,14 @@ setOperator | KW_UNION KW_DISTINCT? -> ^(TOK_UNIONDISTINCT) ; -queryStatementExpression[boolean topLevel] +queryStatementExpression : /* Would be nice to do this as a gated semantic perdicate But the predicate gets pushed as a lookahead decision. Calling rule doesnot know about topLevel */ - (w=withClause {topLevel}?)? - queryStatementExpressionBody[topLevel] { + (w=withClause)? + queryStatementExpressionBody { if ($w.tree != null) { $queryStatementExpressionBody.tree.insertChild(0, $w.tree); } @@ -2325,10 +2325,10 @@ queryStatementExpression[boolean topLevel] -> queryStatementExpressionBody ; -queryStatementExpressionBody[boolean topLevel] +queryStatementExpressionBody : - fromStatement[topLevel] - | regularBody[topLevel] + fromStatement + | regularBody ; withClause @@ -2338,16 +2338,16 @@ withClause cteStatement : - identifier KW_AS LPAREN queryStatementExpression[false] RPAREN + identifier KW_AS LPAREN queryStatementExpression RPAREN -> ^(TOK_SUBQUERY queryStatementExpression identifier) ; -fromStatement[boolean topLevel] +fromStatement : (singleFromStatement -> singleFromStatement) (u=setOperator r=singleFromStatement -> ^($u {$fromStatement.tree} $r) )* - -> {u != null && topLevel}? ^(TOK_QUERY + -> {u != null}? ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY {$fromStatement.tree} @@ -2376,11 +2376,11 @@ The valuesClause rule below ensures that the parse tree for very similar to the tree for "insert into table FOO select a,b from BAR". Since virtual table name is implicit, it's represented as TOK_ANONYMOUS. */ -regularBody[boolean topLevel] +regularBody : i=insertClause ( - s=selectStatement[topLevel] + s=selectStatement {$s.tree.getFirstChildWithType(TOK_INSERT).replaceChildren(0, 0, $i.tree);} -> {$s.tree} | valuesClause @@ -2392,38 +2392,63 @@ regularBody[boolean topLevel] ) ) | - selectStatement[topLevel] + selectStatement ; -selectStatement[boolean topLevel] +atomSelectStatement : - ( s=selectClause f=fromClause? w=whereClause? g=groupByClause? h=havingClause? + win=window_clause? + -> ^(TOK_QUERY $f? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + $s $w? $g? $h? $win?)) + | + LPAREN! selectStatement RPAREN! + ; + +selectStatement + : + a=atomSelectStatement + set=setOpSelectStatement[$atomSelectStatement.tree]? o=orderByClause? c=clusterByClause? d=distributeByClause? sort=sortByClause? - win=window_clause? l=limitClause? - -> ^(TOK_QUERY $f? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) - $s $w? $g? $h? $o? $c? - $d? $sort? $win? $l?)) - ) - (set=setOpSelectStatement[$selectStatement.tree, topLevel])? + { + if(set == null){ + $a.tree.getFirstChildWithType(TOK_INSERT).addChild($o.tree); + $a.tree.getFirstChildWithType(TOK_INSERT).addChild($c.tree); + $a.tree.getFirstChildWithType(TOK_INSERT).addChild($d.tree); + $a.tree.getFirstChildWithType(TOK_INSERT).addChild($sort.tree); + $a.tree.getFirstChildWithType(TOK_INSERT).addChild($l.tree); + } + } -> {set == null}? - {$selectStatement.tree} + {$a.tree} -> {o==null && c==null && d==null && sort==null && l==null}? {$set.tree} - -> {throwSetOpException()} + -> ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_SUBQUERY + {$set.tree} + {adaptor.create(Identifier, generateUnionAlias())} + ) + ) + ^(TOK_INSERT + ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF)) + $o? $c? $d? $sort? $l? + ) + ) ; -setOpSelectStatement[CommonTree t, boolean topLevel] +setOpSelectStatement[CommonTree t] : - (u=setOperator b=simpleSelectStatement + (u=setOperator b=atomSelectStatement -> {$setOpSelectStatement.tree != null && u.tree.getType()==HiveParser.TOK_UNIONDISTINCT}? ^(TOK_QUERY ^(TOK_FROM @@ -2454,15 +2479,8 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ) -> ^(TOK_UNIONALL {$t} $b) )+ - o=orderByClause? - c=clusterByClause? - d=distributeByClause? - sort=sortByClause? - win=window_clause? - l=limitClause? - -> {o==null && c==null && d==null && sort==null && win==null && l==null && !topLevel}? - {$setOpSelectStatement.tree} - -> ^(TOK_QUERY + -> {$setOpSelectStatement.tree.getChild(0).getType()==HiveParser.TOK_UNIONALL}? + ^(TOK_QUERY ^(TOK_FROM ^(TOK_SUBQUERY {$setOpSelectStatement.tree} @@ -2472,27 +2490,15 @@ setOpSelectStatement[CommonTree t, boolean topLevel] ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF)) - $o? $c? $d? $sort? $win? $l? ) ) - ; - -simpleSelectStatement - : - selectClause - fromClause? - whereClause? - groupByClause? - havingClause? - ((window_clause) => window_clause)? - -> ^(TOK_QUERY fromClause? ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) - selectClause whereClause? groupByClause? havingClause? window_clause?)) + -> {$setOpSelectStatement.tree} ; selectStatementWithCTE : (w=withClause)? - selectStatement[true] { + selectStatement { if ($w.tree != null) { $selectStatement.tree.insertChild(0, $w.tree); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 4a44173..6ae731f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -476,7 +476,7 @@ precedenceEqualOperator subQueryExpression : - LPAREN! selectStatement[true] RPAREN! + LPAREN! selectStatement RPAREN! ; precedenceEqualExpression diff --git a/ql/src/test/queries/clientpositive/union_paren.q b/ql/src/test/queries/clientpositive/union_paren.q new file mode 100644 index 0000000..0b38b68 --- /dev/null +++ b/ql/src/test/queries/clientpositive/union_paren.q @@ -0,0 +1,54 @@ +set hive.mapred.mode=nonstrict; + +explain select * from src union all select * from src; + +create table t1(c int); + +insert into t1 values (1),(1),(2); + +create table t2(c int); + +insert into t2 values (2),(1),(2); + +create table t3(c int); + +insert into t3 values (2),(3),(2); + +(select * from t1) union all select * from t2 union select * from t3 order by c; + +(select * from t1) union all (select * from t2 union select * from t3) order by c; + +(select * from src order by key limit 1); + +(select * from src) union all select * from src order by key limit 1; + +(select * from src limit 1) union all select * from src order by key limit 1; + +((select * from src)) union all select * from src order by key limit 1; + +select * from src union all ((select * from src)) order by key limit 1; + +select * from src union all ((select * from src limit 1)) order by key limit 1; + +select * from src union all (select * from src) order by key limit 1; + +(select * from src order by key) union all (select * from src) order by key limit 1; + +(select * from src order by key) union all (select * from src limit 1) order by key limit 1; + +select count(*) from (select key from src union select key from src)cool_cust; + +--similar tpcds q14 + +with cross_items as + (select key, k + from src, + (select iss.key k + from src iss + union all + select ics.key k + from src ics + ) x + where key = k +) +select * from cross_items order by key limit 1; diff --git a/ql/src/test/results/clientpositive/union_paren.q.out b/ql/src/test/results/clientpositive/union_paren.q.out new file mode 100644 index 0000000..2c96092 --- /dev/null +++ b/ql/src/test/results/clientpositive/union_paren.q.out @@ -0,0 +1,260 @@ +PREHOOK: query: explain select * from src union all select * from src +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from src union all select * from src +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Union + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: create table t1(c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: insert into t1 values (1),(1),(2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 values (1),(1),(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.c EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table t2(c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create table t2(c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 values (2),(1),(2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 values (2),(1),(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: create table t3(c int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t3 +POSTHOOK: query: create table t3(c int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t3 +PREHOOK: query: insert into t3 values (2),(3),(2) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__3 +PREHOOK: Output: default@t3 +POSTHOOK: query: insert into t3 values (2),(3),(2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__3 +POSTHOOK: Output: default@t3 +POSTHOOK: Lineage: t3.c EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: (select * from t1) union all select * from t2 union select * from t3 order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: (select * from t1) union all select * from t2 union select * from t3 order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 +2 +3 +PREHOOK: query: (select * from t1) union all (select * from t2 union select * from t3) order by c +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: (select * from t1) union all (select * from t2 union select * from t3) order by c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +1 +1 +1 +2 +2 +3 +PREHOOK: query: (select * from src order by key limit 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: (select * from src order by key limit 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: (select * from src) union all select * from src order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: (select * from src) union all select * from src order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: (select * from src limit 1) union all select * from src order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: (select * from src limit 1) union all select * from src order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: ((select * from src)) union all select * from src order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: ((select * from src)) union all select * from src order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: select * from src union all ((select * from src)) order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src union all ((select * from src)) order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: select * from src union all ((select * from src limit 1)) order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src union all ((select * from src limit 1)) order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: select * from src union all (select * from src) order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select * from src union all (select * from src) order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: (select * from src order by key) union all (select * from src) order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: (select * from src order by key) union all (select * from src) order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: (select * from src order by key) union all (select * from src limit 1) order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: (select * from src order by key) union all (select * from src limit 1) order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +PREHOOK: query: select count(*) from (select key from src union select key from src)cool_cust +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from (select key from src union select key from src)cool_cust +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +309 +PREHOOK: query: --similar tpcds q14 + +with cross_items as + (select key, k + from src, + (select iss.key k + from src iss + union all + select ics.key k + from src ics + ) x + where key = k +) +select * from cross_items order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: --similar tpcds q14 + +with cross_items as + (select key, k + from src, + (select iss.key k + from src iss + union all + select ics.key k + from src ics + ) x + where key = k +) +select * from cross_items order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0