diff --git ql/pom.xml ql/pom.xml index 489c6f3..b17288c 100644 --- ql/pom.xml +++ ql/pom.xml @@ -761,6 +761,7 @@ **/HiveLexer.g **/HiveParser.g + **/HintParser.g diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index e9bf3e4..6910f70 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -3400,7 +3400,7 @@ private RelNode genSelectLogicalPlan(QB qb, RelNode srcRel, RelNode starSrcRel) // TODO: Handle Query Hints; currently we ignore them boolean selectStar = false; int posn = 0; - boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); if (hintPresent) { String hint = ctx.getTokenRewriteStream().toString( selExprList.getChild(0).getTokenStartIndex(), diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HintParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HintParser.g new file mode 100644 index 0000000..8e70a46 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HintParser.g @@ -0,0 +1,83 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +parser grammar HintParser; + +options +{ + tokenVocab=HiveLexer; + output=AST; + ASTLabelType=ASTNode; + backtrack=false; + k=3; +} + +tokens { + TOK_HINTLIST; + TOK_HINT; + TOK_MAPJOIN; + TOK_STREAMTABLE; + TOK_HINTARGLIST; +} + +@header { +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +} + + +@members { + ArrayList errors = new ArrayList(); + + @Override + public void displayRecognitionError(String[] tokenNames, + RecognitionException e) { + errors.add(new ParseError(this, e, tokenNames)); + } +} + +// starting rule +hint + : hintList EOF -> ^(TOK_HINTLIST hintList) + ; + +hintList + : + hintItem (COMMA hintItem)* -> hintItem+ + ; + +hintItem + : + hintName (LPAREN hintArgs RPAREN)? -> ^(TOK_HINT hintName hintArgs?) + ; + +hintName + : + KW_MAPJOIN -> TOK_MAPJOIN + | KW_STREAMTABLE -> TOK_STREAMTABLE + ; + +hintArgs + : + hintArgName (COMMA hintArgName)* -> ^(TOK_HINTARGLIST hintArgName+) + ; + +hintArgName + : + Identifier + ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index e9ccfd2..a36a5d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -488,8 +488,10 @@ CharSetName WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;} ; -COMMENT - : '--' (~('\n'|'\r'))* - { $channel=HIDDEN; } - ; +LINE_COMMENT + : '--' (~('\n'|'\r'))* { $channel=HIDDEN; } + ; +QUERY_HINT + : '/*' (options { greedy=false; } : QUERY_HINT|.)* '*/' { if(getText().charAt(2) != '+') { $channel=HIDDEN; } else { setText(getText().substring(3, getText().length() - 2)); } } + ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 918169a..7e3cd7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -273,11 +273,6 @@ TOK_TABLEPROPERTY; TOK_IFEXISTS; TOK_IFNOTEXISTS; TOK_ORREPLACE; -TOK_HINTLIST; -TOK_HINT; -TOK_MAPJOIN; -TOK_STREAMTABLE; -TOK_HINTARGLIST; TOK_USERSCRIPTCOLNAMES; TOK_USERSCRIPTCOLSCHEMA; TOK_RECORDREADER; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java index 4ab5c47..f9ad07f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseDriver.java @@ -220,6 +220,34 @@ public ASTNode parse(String command, Context ctx, boolean setTokenRewriteStream) return tree; } + /* + * Parse a string as a query hint. + */ + public ASTNode parseHint(String command) throws ParseException { + LOG.info("Parsing hint: " + command); + + HiveLexerX lexer = new HiveLexerX(new ANTLRNoCaseStringStream(command)); + TokenRewriteStream tokens = new TokenRewriteStream(lexer); + HintParser parser = new HintParser(tokens); + parser.setTreeAdaptor(adaptor); + HintParser.hint_return r = null; + try { + r = parser.hint(); + } catch (RecognitionException e) { + e.printStackTrace(); + throw new ParseException(parser.errors); + } + + if (lexer.getErrors().size() == 0 && parser.errors.size() == 0) { + LOG.info("Parse Completed"); + } else if (lexer.getErrors().size() != 0) { + throw new ParseException(lexer.getErrors()); + } else { + throw new ParseException(parser.errors); + } + + return (ASTNode) r.getTree(); + } /* * parse a String as a Select List. This allows table functions to be passed expression Strings diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java index 87ff581..ec52741 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/QBSubQuery.java @@ -536,7 +536,7 @@ boolean subqueryRestrictionsCheck(RowResolver parentQueryRR, int selectExprStart = 0; - if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) { + if ( selectClause.getChild(0).getType() == HiveParser.QUERY_HINT ) { selectExprStart = 1; } @@ -672,7 +672,7 @@ void validateAndRewriteAST(RowResolver outerQueryRR, ASTNode selectClause = (ASTNode) insertClause.getChild(1); int selectExprStart = 0; - if ( selectClause.getChild(0).getType() == HiveParser.TOK_HINTLIST ) { + if ( selectClause.getChild(0).getType() == HiveParser.QUERY_HINT ) { selectExprStart = 1; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g index 2c2e856..ee0741b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SelectClauseParser.g @@ -49,11 +49,11 @@ selectClause @init { gParent.pushMsg("select clause", state); } @after { gParent.popMsg(state); } : - KW_SELECT hintClause? (((KW_ALL | dist=KW_DISTINCT)? selectList) + KW_SELECT QUERY_HINT? (((KW_ALL | dist=KW_DISTINCT)? selectList) | (transform=KW_TRANSFORM selectTrfmClause)) - -> {$transform == null && $dist == null}? ^(TOK_SELECT hintClause? selectList) - -> {$transform == null && $dist != null}? ^(TOK_SELECTDI hintClause? selectList) - -> ^(TOK_SELECT hintClause? ^(TOK_SELEXPR selectTrfmClause) ) + -> {$transform == null && $dist == null}? ^(TOK_SELECT QUERY_HINT? selectList) + -> {$transform == null && $dist != null}? ^(TOK_SELECTDI QUERY_HINT? selectList) + -> ^(TOK_SELECT QUERY_HINT? ^(TOK_SELEXPR selectTrfmClause) ) | trfmClause ->^(TOK_SELECT ^(TOK_SELEXPR trfmClause)) ; @@ -77,49 +77,6 @@ selectTrfmClause -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) ; -hintClause -@init { gParent.pushMsg("hint clause", state); } -@after { gParent.popMsg(state); } - : - DIVIDE STAR PLUS hintList STAR DIVIDE -> ^(TOK_HINTLIST hintList) - ; - -hintList -@init { gParent.pushMsg("hint list", state); } -@after { gParent.popMsg(state); } - : - hintItem (COMMA hintItem)* -> hintItem+ - ; - -hintItem -@init { gParent.pushMsg("hint item", state); } -@after { gParent.popMsg(state); } - : - hintName (LPAREN hintArgs RPAREN)? -> ^(TOK_HINT hintName hintArgs?) - ; - -hintName -@init { gParent.pushMsg("hint name", state); } -@after { gParent.popMsg(state); } - : - KW_MAPJOIN -> TOK_MAPJOIN - | KW_STREAMTABLE -> TOK_STREAMTABLE - ; - -hintArgs -@init { gParent.pushMsg("hint arguments", state); } -@after { gParent.popMsg(state); } - : - hintArgName (COMMA hintArgName)* -> ^(TOK_HINTARGLIST hintArgName+) - ; - -hintArgName -@init { gParent.pushMsg("hint argument name", state); } -@after { gParent.popMsg(state); } - : - identifier - ; - selectItem @init { gParent.pushMsg("selection target", state); } @after { gParent.popMsg(state); } @@ -220,4 +177,3 @@ window_frame_boundary KW_CURRENT KW_ROW -> ^(KW_CURRENT) | Number (d=KW_PRECEDING | d=KW_FOLLOWING ) -> ^($d Number) ; - diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6c0f300..81cc856 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1436,9 +1436,19 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1, PlannerContext plan qbp.setSelExprForClause(ctx_1.dest, ast); int posn = 0; - if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.TOK_HINTLIST) { - qbp.setHints((ASTNode) ast.getChild(0)); - posn++; + if (((ASTNode) ast.getChild(0)).getToken().getType() == HiveParser.QUERY_HINT) { + ParseDriver pd = new ParseDriver(); + String queryHintStr = ast.getChild(0).getText(); + if (LOG.isDebugEnabled()) { + LOG.debug("QUERY HINT: "+queryHintStr); + } + try { + ASTNode hintNode = pd.parseHint(queryHintStr); + qbp.setHints((ASTNode) hintNode); + posn++; + } catch (ParseException e) { + throw new SemanticException("failed to parse query hint: "+e.getMessage(), e); + } } if ((ast.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM)) @@ -3936,7 +3946,7 @@ public static int setBit(int bitmap, int bitIdx) { : selectExprs.getChildCount()); if (selectExprs != null) { for (int i = 0; i < selectExprs.getChildCount(); ++i) { - if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.TOK_HINTLIST) { + if (((ASTNode) selectExprs.getChild(i)).getToken().getType() == HiveParser.QUERY_HINT) { continue; } // table.column AS alias @@ -4084,7 +4094,7 @@ static boolean isRegex(String pattern, HiveConf conf) { // SELECT * or SELECT TRANSFORM(*) boolean selectStar = false; int posn = 0; - boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.TOK_HINTLIST); + boolean hintPresent = (selExprList.getChild(0).getType() == HiveParser.QUERY_HINT); if (hintPresent) { posn++; } @@ -8417,7 +8427,7 @@ private void pushJoinFilters(QB qb, QBJoinTree joinTree, ASTNode hints = qb.getParseInfo().getHints(); for (int pos = 0; pos < hints.getChildCount(); pos++) { ASTNode hint = (ASTNode) hints.getChild(pos); - if (((ASTNode) hint.getChild(0)).getToken().getType() == HiveParser.TOK_MAPJOIN) { + if (((ASTNode) hint.getChild(0)).getToken().getType() == HintParser.TOK_MAPJOIN) { // the user has specified to ignore mapjoin hint if (!conf.getBoolVar(HiveConf.ConfVars.HIVEIGNOREMAPJOINHINT) && !conf.getVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez")) { @@ -8878,7 +8888,7 @@ private void parseStreamTables(QBJoinTree joinTree, QB qb) { for (Node hintNode : qb.getParseInfo().getHints().getChildren()) { ASTNode hint = (ASTNode) hintNode; - if (hint.getChild(0).getType() == HiveParser.TOK_STREAMTABLE) { + if (hint.getChild(0).getType() == HintParser.TOK_STREAMTABLE) { for (int i = 0; i < hint.getChild(1).getChildCount(); i++) { if (streamAliases == null) { streamAliases = new ArrayList(); diff --git ql/src/test/queries/clientpositive/comments.q ql/src/test/queries/clientpositive/comments.q new file mode 100644 index 0000000..cacb689 --- /dev/null +++ ql/src/test/queries/clientpositive/comments.q @@ -0,0 +1,21 @@ +-- COMMENT +select key from src limit 1; + +/* comment comment */ +select key from src limit 1; + +select /*comment*/ key from src limit 1; + +select /*comment*/ key from /* comment */ src /* comment */ limit 1; + +select /**/ key /* */ from src limit 1; + +/* + +*/ +select /* +*/ key from src limit 1; + +select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0; + +explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0; diff --git ql/src/test/results/clientpositive/comments.q.out ql/src/test/results/clientpositive/comments.q.out new file mode 100644 index 0000000..4395cb2 --- /dev/null +++ ql/src/test/results/clientpositive/comments.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: select key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: /* comment comment */ +select key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: /* comment comment */ +select key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*comment*/ key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*comment*/ key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*comment*/ key from /* comment */ src /* comment */ limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /**/ key /* */ from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /**/ key /* */ from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: /* + +*/ +select /* +*/ key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: /* + +*/ +select /* +*/ key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +238 +PREHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +1019 +PREHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select /*+ MAPJOIN(a) */ count(*) from src a join src b on a.key = b.key where a.key > 0 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key > 0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 key (type: string) + 1 key (type: string) + Position of Big Table: 1 + Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: bigint) + auto parallelism: false + Local Work: + Map Reduce Local Work + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.src + numFiles 1 + numRows 500 + rawDataSize 5312 + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src + name: default.src + Truncated Path -> Alias: + /src [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types bigint + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink +