diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 64af7d1..a10e0b1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -430,24 +430,31 @@ precedenceEqualOperator subQueryExpression : LPAREN! selectStatement[true] RPAREN! - ; - + ; + precedenceEqualExpression : + (LPAREN precedenceBitwiseOrExpression COMMA) => precedenceEqualExpressionMutiple + | + precedenceEqualExpressionSingle + ; + +precedenceEqualExpressionSingle + : (left=precedenceBitwiseOrExpression -> $left) ( (KW_NOT precedenceEqualNegatableOperator notExpr=precedenceBitwiseOrExpression) - -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpression $notExpr)) + -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpressionSingle $notExpr)) | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) - -> ^(precedenceEqualOperator $precedenceEqualExpression $equalExpr) + -> ^(precedenceEqualOperator $precedenceEqualExpressionSingle $equalExpr) | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) - -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression)) + -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle)) | (KW_NOT KW_IN expressions) - -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions)) | (KW_IN LPAREN KW_SELECT)=> (KW_IN subQueryExpression) - -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle) | (KW_IN expressions) - -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions) | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max) | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) @@ -458,9 +465,24 @@ precedenceEqualExpression expressions : - LPAREN expression (COMMA expression)* RPAREN -> expression* + LPAREN expression (COMMA expression)* RPAREN -> expression+ + ; + +//we transform the (col0, col1) in ((v00,v01),(v10,v11)) into struct(col0, col1) in (struct(v00,v01),struct(v10,v11)) +precedenceEqualExpressionMutiple + : + (LPAREN precedenceBitwiseOrExpression (COMMA precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] precedenceBitwiseOrExpression+)) + ( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+) + | (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+))) ; +expressionsToStruct + : + LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_FUNCTION Identifier["struct"] expression+) + ; + precedenceNotOperator : KW_NOT @@ -635,7 +657,7 @@ nonReserved | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER - | KW_REGEXP | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_RLIKE + | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED diff --git a/ql/src/test/queries/clientpositive/multi_column_in.q b/ql/src/test/queries/clientpositive/multi_column_in.q new file mode 100644 index 0000000..75d3415 --- /dev/null +++ b/ql/src/test/queries/clientpositive/multi_column_in.q @@ -0,0 +1,26 @@ +create table emps (empno int, deptno int, empname string); + +insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22"); + +select * from emps; + +select * from emps where (empno,deptno) in ((1,2),(3,2)); + +select * from emps where (empno,deptno) in ((1,2),(1,3)); + +explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)); + +select * from emps where empno in (1,2); + +select * from emps where empno in (1,2) and deptno > 2; + +explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)); + +select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)); + +select (empno*2)|1,substr(empname,1,1) from emps; + +select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')); + +select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')); \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/multi_column_in.q.out b/ql/src/test/results/clientpositive/multi_column_in.q.out new file mode 100644 index 0000000..dd983d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/multi_column_in.q.out @@ -0,0 +1,194 @@ +PREHOOK: query: create table emps (empno int, deptno int, empname string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps (empno int, deptno int, empname string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@emps +POSTHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.deptno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: emps.empname SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: emps.empno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from emps +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +3 4 33 +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +1 3 11 +PREHOOK: query: explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct((empno + 1),deptno)) IN (const struct(1,2), const struct(3,2)) (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: empno (type: int), deptno (type: int), empname (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps where empno in (1,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where empno in (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: select * from emps where empno in (1,2) and deptno > 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where empno in (1,2) and deptno > 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(((empno * 2) | 1),deptno)) IN (struct((empno + 1),2), struct((empno + 2),2)) (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: empno (type: int), deptno (type: int), empname (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +PREHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +3 1 +3 1 +7 3 +3 1 +5 2 +5 2 +PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +2 5 22 +2 5 22