diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 64af7d1..231be83 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -430,24 +430,31 @@ precedenceEqualOperator subQueryExpression : LPAREN! selectStatement[true] RPAREN! - ; - + ; + precedenceEqualExpression : + (LPAREN precedenceBitwiseOrExpression COMMA) => precedenceEqualExpressionMutiple + | + precedenceEqualExpressionSingle + ; + +precedenceEqualExpressionSingle + : (left=precedenceBitwiseOrExpression -> $left) ( (KW_NOT precedenceEqualNegatableOperator notExpr=precedenceBitwiseOrExpression) - -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpression $notExpr)) + -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpressionSingle $notExpr)) | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) - -> ^(precedenceEqualOperator $precedenceEqualExpression $equalExpr) + -> ^(precedenceEqualOperator $precedenceEqualExpressionSingle $equalExpr) | (KW_NOT KW_IN LPAREN KW_SELECT)=> (KW_NOT KW_IN subQueryExpression) - -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression)) + -> ^(KW_NOT ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle)) | (KW_NOT KW_IN expressions) - -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions)) | (KW_IN LPAREN KW_SELECT)=> (KW_IN subQueryExpression) - -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpression) + -> ^(TOK_SUBQUERY_EXPR ^(TOK_SUBQUERY_OP KW_IN) subQueryExpression $precedenceEqualExpressionSingle) | (KW_IN expressions) - -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionSingle expressions) | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max) | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) @@ -458,9 +465,24 @@ precedenceEqualExpression expressions : - LPAREN expression (COMMA expression)* RPAREN -> expression* + LPAREN expression (COMMA expression)* RPAREN -> expression+ + ; + +//we transform the (col0, col1) in ((v00,v01),(v10,v11)) into struct(col0, col1) in (struct(v00,v01),struct(v10,v11)) +precedenceEqualExpressionMutiple + : + (LPAREN precedenceBitwiseOrExpression (COMMA precedenceBitwiseOrExpression)+ RPAREN -> ^(TOK_FUNCTION Identifier["struct"] precedenceBitwiseOrExpression+)) + ( (KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+) + | (KW_NOT KW_IN LPAREN expressionsToStruct (COMMA expressionsToStruct)+ RPAREN) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpressionMutiple expressionsToStruct+))) ; +expressionsToStruct + : + LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_FUNCTION Identifier["struct"] expression+) + ; + precedenceNotOperator : KW_NOT @@ -635,7 +657,7 @@ nonReserved | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER - | KW_REGEXP | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_RLIKE + | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED @@ -668,5 +690,7 @@ sql11ReservedKeywordsUsedAsIdentifier | KW_LEFT | KW_LIKE | KW_LOCAL | KW_NONE | KW_NULL | KW_OF | KW_ORDER | KW_OUT | KW_OUTER | KW_PARTITION | KW_PERCENT | KW_PROCEDURE | KW_RANGE | KW_READS | KW_REVOKE | KW_RIGHT | KW_ROLLUP | KW_ROW | KW_ROWS | KW_SET | KW_SMALLINT | KW_TABLE | KW_TIMESTAMP | KW_TO | KW_TRIGGER | KW_TRUE - | KW_TRUNCATE | KW_UNION | KW_UPDATE | KW_USER | KW_USING | KW_VALUES | KW_WITH + | KW_TRUNCATE | KW_UNION | KW_UPDATE | KW_USER | KW_USING | KW_VALUES | KW_WITH +//The following two keywords come from MySQL. Although they are not keywords in SQL2011, they are reserved keywords in MySQL. + | KW_REGEXP | KW_RLIKE ; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java index 61b5892..fad5aad 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsNegative.java @@ -30,7 +30,7 @@ /** * Parser tests for SQL11 Reserved KeyWords. Please find more information in - * HIVE-6617. Total number : 74 + * HIVE-6617. Total number : 74 + 2 (MySQL) */ public class TestSQL11ReservedKeyWordsNegative { private static HiveConf conf; @@ -1070,4 +1070,34 @@ public void testSQL11ReservedKeyWords_WITH() { ex.getMessage()); } } + + // MySQL reserved keywords. + @Test + public void testSQL11ReservedKeyWords_RLIKE() { + try { + parse("CREATE TABLE RLIKE (col STRING)"); + Assert.assertFalse("Expected ParseException", true); + } catch (ParseException ex) { + Assert + .assertEquals( + "Failure didn't match.", + "line 1:13 Failed to recognize predicate 'RLIKE'. Failed rule: 'identifier' in table name", + ex.getMessage()); + } + } + + @Test + public void testSQL11ReservedKeyWords_REGEXP() { + try { + parse("CREATE TABLE REGEXP (col STRING)"); + Assert.assertFalse("Expected ParseException", true); + } catch (ParseException ex) { + Assert + .assertEquals( + "Failure didn't match.", + "line 1:13 Failed to recognize predicate 'REGEXP'. Failed rule: 'identifier' in table name", + ex.getMessage()); + } + } + } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java index 4c84e91..ef5131d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestSQL11ReservedKeyWordsPositive.java @@ -30,7 +30,7 @@ /** * Parser tests for SQL11 Reserved KeyWords. Please find more information in - * HIVE-6617. Total number : 74 + * HIVE-6617. Total number : 74 + 2 (MySQL) */ public class TestSQL11ReservedKeyWordsPositive { private static HiveConf conf; @@ -798,4 +798,25 @@ public void testSQL11ReservedKeyWords_WITH() throws ParseException { "(TOK_CREATETABLE (TOK_TABNAME WITH) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))", ast.toStringTree()); } + + // MySQL reserved keywords. + @Test + public void testSQL11ReservedKeyWords_RLIKE() throws ParseException { + ASTNode ast = parse("CREATE TABLE RLIKE (col STRING)"); + Assert + .assertEquals( + "AST doesn't match", + "(TOK_CREATETABLE (TOK_TABNAME RLIKE) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))", + ast.toStringTree()); + } + + @Test + public void testSQL11ReservedKeyWords_REGEXP() throws ParseException { + ASTNode ast = parse("CREATE TABLE REGEXP (col STRING)"); + Assert + .assertEquals( + "AST doesn't match", + "(TOK_CREATETABLE (TOK_TABNAME REGEXP) TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL col TOK_STRING)))", + ast.toStringTree()); + } } diff --git a/ql/src/test/queries/clientpositive/char_udf1.q b/ql/src/test/queries/clientpositive/char_udf1.q index 8848609..2ca33d4 100644 --- a/ql/src/test/queries/clientpositive/char_udf1.q +++ b/ql/src/test/queries/clientpositive/char_udf1.q @@ -74,10 +74,13 @@ select ltrim(c2) = ltrim(c4) from char_udf_1 limit 1; +-- In hive wiki page https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF +-- we only allow A regexp B, not regexp (A,B). + select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') + c2 regexp'val', + c4 regexp 'val', + c2 regexp 'val' = c4 regexp 'val' from char_udf_1 limit 1; select diff --git a/ql/src/test/queries/clientpositive/multi_column_in.q b/ql/src/test/queries/clientpositive/multi_column_in.q new file mode 100644 index 0000000..75d3415 --- /dev/null +++ b/ql/src/test/queries/clientpositive/multi_column_in.q @@ -0,0 +1,26 @@ +create table emps (empno int, deptno int, empname string); + +insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22"); + +select * from emps; + +select * from emps where (empno,deptno) in ((1,2),(3,2)); + +select * from emps where (empno,deptno) in ((1,2),(1,3)); + +explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)); + +select * from emps where empno in (1,2); + +select * from emps where empno in (1,2) and deptno > 2; + +explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)); + +select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)); + +select (empno*2)|1,substr(empname,1,1) from emps; + +select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')); + +select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')); \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/varchar_udf1.q b/ql/src/test/queries/clientpositive/varchar_udf1.q index 395fb12..5b6d207 100644 --- a/ql/src/test/queries/clientpositive/varchar_udf1.q +++ b/ql/src/test/queries/clientpositive/varchar_udf1.q @@ -75,9 +75,9 @@ select from varchar_udf_1 limit 1; select - regexp(c2, 'val'), - regexp(c4, 'val'), - regexp(c2, 'val') = regexp(c4, 'val') + c2 regexp'val', + c4 regexp 'val', + c2 regexp 'val' = c4 regexp 'val' from varchar_udf_1 limit 1; select diff --git a/ql/src/test/results/clientpositive/multi_column_in.q.out b/ql/src/test/results/clientpositive/multi_column_in.q.out new file mode 100644 index 0000000..dd983d1 --- /dev/null +++ b/ql/src/test/results/clientpositive/multi_column_in.q.out @@ -0,0 +1,194 @@ +PREHOOK: query: create table emps (empno int, deptno int, empname string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emps +POSTHOOK: query: create table emps (empno int, deptno int, empname string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emps +PREHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22") +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@emps +POSTHOOK: query: insert into table emps values (1,2,"11"),(1,2,"11"),(3,4,"33"),(1,3,"11"),(2,5,"22"),(2,5,"22") +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@emps +POSTHOOK: Lineage: emps.deptno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: emps.empname SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: emps.empno EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: select * from emps +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +3 4 33 +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(3,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +PREHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where (empno,deptno) in ((1,2),(1,3)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +1 3 11 +PREHOOK: query: explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select * from emps where (empno+1,deptno) in ((1,2),(3,2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct((empno + 1),deptno)) IN (const struct(1,2), const struct(3,2)) (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: empno (type: int), deptno (type: int), empname (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps where empno in (1,2) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where empno in (1,2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: select * from emps where empno in (1,2) and deptno > 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where empno in (1,2) and deptno > 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 3 11 +2 5 22 +2 5 22 +PREHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +PREHOOK: type: QUERY +POSTHOOK: query: explain select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: emps + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (struct(((empno * 2) | 1),deptno)) IN (struct((empno + 1),2), struct((empno + 2),2)) (type: boolean) + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: empno (type: int), deptno (type: int), empname (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 18 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,deptno) in ((empno+1,2),(empno+2,2)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +1 2 11 +1 2 11 +PREHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select (empno*2)|1,substr(empname,1,1) from emps +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +3 1 +3 1 +7 3 +3 1 +5 2 +5 2 +PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+2,'2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +PREHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')) +PREHOOK: type: QUERY +PREHOOK: Input: default@emps +#### A masked pattern was here #### +POSTHOOK: query: select * from emps where ((empno*2)|1,substr(empname,1,1)) in ((empno+1,'2'),(empno+3,'2')) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emps +#### A masked pattern was here #### +2 5 22 +2 5 22