diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index d1609e1186..c65ad45e6d 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -45,6 +45,7 @@ import org.antlr.runtime.ClassicToken; import org.antlr.runtime.CommonToken; +import org.antlr.runtime.Token; import org.antlr.runtime.TokenRewriteStream; import org.antlr.runtime.tree.Tree; import org.antlr.runtime.tree.TreeVisitor; @@ -14140,13 +14141,41 @@ public void setLoadFileWork(List loadFileWork) { this.loadFileWork = loadFileWork; } + private String getQueryStringFromAst(ASTNode ast) { + StringBuilder sb = new StringBuilder(); + int startIdx = ast.getTokenStartIndex(); + int endIdx = ast.getTokenStopIndex(); + + boolean queryNeedsQuotes = true; + if (conf.getVar(ConfVars.HIVE_QUOTEDID_SUPPORT).equals("none")) { + queryNeedsQuotes = false; + } + + for (int idx = startIdx; idx <= endIdx; idx++) { + Token curTok = ctx.getTokenRewriteStream().get(idx); + if (curTok.getType() == Token.EOF) { + continue; + } else if (queryNeedsQuotes && curTok.getType() == HiveLexer.Identifier) { + // The Tokens have no distinction between Identifiers and QuotedIdentifiers. + // Ugly solution is just to surround all identifiers with quotes. + sb.append('`'); + // Re-escape any backtick (`) characters in the identifier. + sb.append(curTok.getText().replaceAll("`", "``")); + sb.append('`'); + } else { + sb.append(curTok.getText()); + } + } + return sb.toString(); + } + /** * Generate the query string for this query (with fully resolved table references). * @return The query string with resolved references. NULL if an error occurred. */ private String getQueryStringForCache(ASTNode ast) { // Use the UnparseTranslator to resolve unqualified table names. - String queryString = ctx.getTokenRewriteStream().toString(ast.getTokenStartIndex(), ast.getTokenStopIndex()); + String queryString = getQueryStringFromAst(ast); // Re-using the TokenRewriteStream map for views so we do not overwrite the current TokenRewriteStream String rewriteStreamName = "__qualified_query_string__"; diff --git a/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q new file mode 100644 index 0000000000..4802f43ba9 --- /dev/null +++ b/ql/src/test/queries/clientpositive/results_cache_quoted_identifiers.q @@ -0,0 +1,21 @@ + +create table quoted1 ( + `_c1` int, + `int` int, + `col 3` string, + `col``4` string +) stored as textfile; + +insert into quoted1 select key, key, value, value from src; + +set hive.query.results.cache.enabled=true; + +explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; + +set test.comment="Cache should be used for this query"; +set test.comment; +explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1; diff --git a/ql/src/test/results/clientpositive/results_cache_quoted_identifiers.q.out b/ql/src/test/results/clientpositive/results_cache_quoted_identifiers.q.out new file mode 100644 index 0000000000..4d982a7c6b --- /dev/null +++ b/ql/src/test/results/clientpositive/results_cache_quoted_identifiers.q.out @@ -0,0 +1,116 @@ +PREHOOK: query: create table quoted1 ( + `_c1` int, + `int` int, + `col 3` string, + `col``4` string +) stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@quoted1 +POSTHOOK: query: create table quoted1 ( + `_c1` int, + `int` int, + `col 3` string, + `col``4` string +) stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@quoted1 +PREHOOK: query: insert into quoted1 select key, key, value, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@quoted1 +POSTHOOK: query: insert into quoted1 select key, key, value, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@quoted1 +POSTHOOK: Lineage: quoted1._c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: quoted1.col 3 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: quoted1.col`4 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: quoted1.int EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: quoted1 + Statistics: Num rows: 500 Data size: 11124 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _c1 (type: int), int (type: int), col 3 (type: string), col`4 (type: string) + outputColumnNames: _c1, int, col 3, col`4 + Statistics: Num rows: 500 Data size: 11124 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_c1), max(int), max(col 3), max(col`4) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), max(VALUE._col1), max(VALUE._col2), max(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +PREHOOK: type: QUERY +PREHOOK: Input: default@quoted1 +#### A masked pattern was here #### +POSTHOOK: query: select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@quoted1 +#### A masked pattern was here #### +498 498 val_98 val_98 +test.comment="Cache should be used for this query" +PREHOOK: query: explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + Cached Query Result: true + +PREHOOK: query: select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +PREHOOK: type: QUERY +PREHOOK: Input: default@quoted1 +#### A masked pattern was here #### +POSTHOOK: query: select max(`_c1`), max(`int`), max(`col 3`), max(`col``4`) from quoted1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@quoted1 +#### A masked pattern was here #### +498 498 val_98 val_98