commit fb739c9ab7325c017f1ec937d8f1031f94b0e02b Author: Andrew Sherman Date: Tue Jun 20 15:27:38 2017 -0700 HIVE-16935: Hive should strip comments from input before choosing which CommandProcessor to run. We strip sql comments from a command string. The stripped command is use to determine which CommandProcessor will execute the command. If the CommandProcessorFactory does not select a special CommandProcessor then we execute the original unstripped command so that the sql parser can remove comments. Move BeeLine's comment stripping code to HiveStringUtils and change BeeLine to call it from there Add a better test with separate tokens for "set role" in TestCommandProcessorFactory. Add a test case for comment removal in set_processor_namespaces.q using an indented comment as unindented comments are removed by the test driver. Change-Id: I166dc1e7588ec9802ba373d88e69e716aecd33c2 diff --git beeline/src/java/org/apache/hive/beeline/Commands.java beeline/src/java/org/apache/hive/beeline/Commands.java index 3b2d72ed79771e6198e62c47060a7f80665dbcb2..7897e224c2f56f8471b7d8a499769179f59efbf7 100644 --- beeline/src/java/org/apache/hive/beeline/Commands.java +++ beeline/src/java/org/apache/hive/beeline/Commands.java @@ -60,10 +60,10 @@ import org.apache.hadoop.hive.conf.VariableSubstitution; import org.apache.hadoop.io.IOUtils; import org.apache.hive.beeline.logs.BeelineInPlaceUpdateStream; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.jdbc.HiveStatement; import org.apache.hive.jdbc.Utils; import org.apache.hive.jdbc.Utils.JdbcConnectionParams; -import com.google.common.annotations.VisibleForTesting; import org.apache.hive.jdbc.logs.InPlaceUpdateStream; public class Commands { @@ -1068,39 +1068,13 @@ private boolean executeInternal(String sql, boolean call) { return true; } - //startQuote use array type in order to pass int type as input/output parameter. - //This method remove comment from current line of a query. - //It does not remove comment like strings inside quotes. - @VisibleForTesting - String removeComments(String line, int[] startQuote) { - if (line == null || line.isEmpty()) return line; - if (startQuote[0] == -1 && beeLine.isComment(line)) return ""; //assume # can only be used at the beginning of line. - StringBuilder builder = new StringBuilder(); - for (int index = 0; index < line.length(); index++) { - if (startQuote[0] == -1 && index < line.length() - 1 && line.charAt(index) == '-' && line.charAt(index + 1) =='-') { - return builder.toString().trim(); - } - - char letter = line.charAt(index); - if (startQuote[0] == letter && (index == 0 || line.charAt(index -1) != '\\') ) { - startQuote[0] = -1; // Turn escape off. - } else if (startQuote[0] == -1 && (letter == '\'' || letter == '"') && (index == 0 || line.charAt(index -1) != '\\')) { - startQuote[0] = letter; // Turn escape on. - } - - builder.append(letter); - } - - return builder.toString().trim(); - } - /* * Check if the input line is a multi-line command which needs to read further */ public String handleMultiLineCmd(String line) throws IOException { //When using -e, console reader is not initialized and command is always a single line int[] startQuote = {-1}; - line = removeComments(line,startQuote); + line = HiveStringUtils.removeComments(line, startQuote); while (isMultiLine(line) && beeLine.getOpts().isAllowMultiLineCommand()) { StringBuilder prompt = new StringBuilder(beeLine.getPrompt()); if (!beeLine.getOpts().isSilent()) { @@ -1125,7 +1099,7 @@ public String handleMultiLineCmd(String line) throws IOException { if (extra == null) { //it happens when using -f and the line of cmds does not end with ; break; } - extra = removeComments(extra,startQuote); + extra = HiveStringUtils.removeComments(extra, startQuote); if (extra != null && !extra.isEmpty()) { line += "\n" + extra; } diff --git beeline/src/test/org/apache/hive/beeline/TestCommands.java beeline/src/test/org/apache/hive/beeline/TestCommands.java index 04c939a04c7a56768286743c2bb9c9797507e3aa..80c01ff8d109a58b57192fd73b3b3cb92e5bd36f 100644 --- beeline/src/test/org/apache/hive/beeline/TestCommands.java +++ beeline/src/test/org/apache/hive/beeline/TestCommands.java @@ -20,29 +20,28 @@ import org.junit.Test; +import static org.apache.hive.common.util.HiveStringUtils.removeComments; import static org.junit.Assert.assertEquals; public class TestCommands { @Test public void testLinesEndingWithComments() { - BeeLine beeline = new BeeLine(); - Commands commands = new Commands(beeline); int[] escape = {-1}; - assertEquals("show tables;", commands.removeComments("show tables;",escape)); - assertEquals("show tables;", commands.removeComments("show tables; --comments",escape)); - assertEquals("show tables;", commands.removeComments("show tables; -------comments",escape)); - assertEquals("show tables;", commands.removeComments("show tables; -------comments;one;two;three;;;;",escape)); - assertEquals("show", commands.removeComments("show-- tables; -------comments",escape)); - assertEquals("show", commands.removeComments("show --tables; -------comments",escape)); - assertEquals("s", commands.removeComments("s--how --tables; -------comments",escape)); - assertEquals("", commands.removeComments("-- show tables; -------comments",escape)); + assertEquals("show tables;", removeComments("show tables;",escape)); + assertEquals("show tables;", removeComments("show tables; --comments",escape)); + assertEquals("show tables;", removeComments("show tables; -------comments",escape)); + assertEquals("show tables;", removeComments("show tables; -------comments;one;two;three;;;;",escape)); + assertEquals("show", removeComments("show-- tables; -------comments",escape)); + assertEquals("show", removeComments("show --tables; -------comments",escape)); + assertEquals("s", removeComments("s--how --tables; -------comments",escape)); + assertEquals("", removeComments("-- show tables; -------comments",escape)); - assertEquals("\"show tables\"", commands.removeComments("\"show tables\" --comments",escape)); - assertEquals("\"show --comments tables\"", commands.removeComments("\"show --comments tables\" --comments",escape)); - assertEquals("\"'show --comments' tables\"", commands.removeComments("\"'show --comments' tables\" --comments",escape)); - assertEquals("'show --comments tables'", commands.removeComments("'show --comments tables' --comments",escape)); - assertEquals("'\"show --comments tables\"'", commands.removeComments("'\"show --comments tables\"' --comments",escape)); + assertEquals("\"show tables\"", removeComments("\"show tables\" --comments",escape)); + assertEquals("\"show --comments tables\"", removeComments("\"show --comments tables\" --comments",escape)); + assertEquals("\"'show --comments' tables\"", removeComments("\"'show --comments' tables\" --comments",escape)); + assertEquals("'show --comments tables'", removeComments("'show --comments tables' --comments",escape)); + assertEquals("'\"show --comments tables\"'", removeComments("'\"show --comments tables\"' --comments",escape)); } } diff --git cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 27fd66d35ea89b0de0d17763625fbf564584fcca..cf065821eef91fec3f5a7004f478a937d9209310 100644 --- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -79,6 +79,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.io.IOUtils; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.common.util.ShutdownHookManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,7 +123,7 @@ public int processCmd(String cmd) { // Flush the print stream, so it doesn't include output from the last command ss.err.flush(); - String cmd_trimmed = cmd.trim(); + String cmd_trimmed = HiveStringUtils.removeComments(cmd).trim(); String[] tokens = tokenizeCmd(cmd_trimmed); int ret = 0; @@ -181,7 +182,12 @@ public int processCmd(String cmd) { } else { // local mode try { CommandProcessor proc = CommandProcessorFactory.get(tokens, (HiveConf) conf); - ret = processLocalCmd(cmd, proc, ss); + if (proc instanceof Driver) { + // Let Driver strip comments using sql parser + ret = processLocalCmd(cmd, proc, ss); + } else { + ret = processLocalCmd(cmd_trimmed, proc, ss); + } } catch (SQLException e) { console.printError("Failed processing command " + tokens[0] + " " + e.getLocalizedMessage(), org.apache.hadoop.util.StringUtils.stringifyException(e)); diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index 4a6413a7c376ffb4de6d20d24707ac5bf89ebc0c..975cd5ac2aabd1e2b9ddd68f5354b043ecffda8f 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -1073,4 +1073,80 @@ public static String getPartitionValWithInvalidCharacter(List partVals, return null; } + + /** + * Strip comments from a sql statement, tracking when the statement contains a string literal. + * + * @param statement the input string + * @return a stripped statement + */ + public static String removeComments(String statement) { + if (statement == null) { + return null; + } + String[] split = statement.split("\n"); + int[] startQuote = {-1}; + StringBuilder ret = new StringBuilder(statement.length()); + for (int i = 0; i < split.length; i++) { + String lineWithComments = split[i]; + String lineNoComments = removeComments(lineWithComments, startQuote); + ret.append(lineNoComments); + if (i != split.length - 1 && !lineNoComments.isEmpty()) { + ret.append("\n"); + } + } + return ret.toString(); + } + + /** + * Remove comments from the current line of a query. + * Avoid removing comment-like strings inside quotes. + * @param line a line of sql text + * @param startQuote The value -1 indicates that line does not begin inside a string literal. + * Other values indicate that line does begin inside a string literal + * and the value passed is the delimiter character. + * The array type is used to pass int type as input/output parameter. + * @return the line with comments removed. + */ + public static String removeComments(String line, int[] startQuote) { + if (line == null || line.isEmpty()) { + return line; + } + if (startQuote[0] == -1 && isComment(line)) { + return ""; //assume # can only be used at the beginning of line. + } + StringBuilder builder = new StringBuilder(); + for (int index = 0; index < line.length(); index++) { + if (startQuote[0] == -1 && index < line.length() - 1 && line.charAt(index) == '-' + && line.charAt(index + 1) == '-') { + return builder.toString().trim(); + } + + char letter = line.charAt(index); + if (startQuote[0] == letter && (index == 0 || line.charAt(index - 1) != '\\')) { + startQuote[0] = -1; // Turn escape off. + } else if (startQuote[0] == -1 && (letter == '\'' || letter == '"') && (index == 0 + || line.charAt(index - 1) != '\\')) { + startQuote[0] = letter; // Turn escape on. + } + + builder.append(letter); + } + + return builder.toString(); + } + + /** + * Test whether a line is a comment. + * + * @param line the line to be tested + * @return true if a comment + */ + private static boolean isComment(String line) { + // SQL92 comment prefix is "--" + // beeline also supports shell-style "#" prefix + String lineTrimmed = line.trim(); + return lineTrimmed.startsWith("#") || lineTrimmed.startsWith("--"); + } + } diff --git common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java index 6bd7037152c6f809daec8af42708693c05fe00cf..3ba13aa2a60a1aedb584a6831ddc8a0dd9de3ea8 100644 --- common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java +++ common/src/test/org/apache/hive/common/util/TestHiveStringUtils.java @@ -18,6 +18,7 @@ package org.apache.hive.common.util; +import static org.apache.hive.common.util.HiveStringUtils.removeComments; import static org.junit.Assert.*; import java.util.Arrays; @@ -61,4 +62,52 @@ public void splitAndUnEscapeTestCase(String testValue, String[] expectedResults) assertTrue(Arrays.toString(expectedResults) + " == " + Arrays.toString(testResults), Arrays.equals(expectedResults, testResults)); } + + @Test + public void testStripComments() throws Exception { + assertNull(removeComments(null)); + assertUnchanged("foo"); + assertUnchanged("select 1"); + assertUnchanged("insert into foo (values('-----')"); + assertUnchanged("insert into foo (values('abc\n\'xyz')"); + assertUnchanged("create database if not exists testDB; set hive.cli.print.current.db=true;use\ntestDB;\nuse default;drop if exists testDB;"); + + assertEquals("foo", removeComments("foo\n")); + assertEquals("foo", removeComments("\nfoo")); + assertEquals("insert into foo (values('-----')", removeComments("--comment\ninsert into foo (values('-----')")); + assertEquals("insert into foo (values('----''-')", removeComments("--comment\ninsert into foo (values('----''-')")); + assertEquals("insert into foo (values(\"----''-\")", removeComments("--comment\ninsert into foo (values(\"----''-\")")); + assertEquals("insert into foo (values(\"----\"\"-\")", removeComments("--comment\ninsert into foo (values(\"----\"\"-\")")); + assertEquals("insert into foo (values('-\n--\n--')", removeComments("--comment\ninsert into foo (values('-\n--\n--')")); + assertEquals("insert into foo (values('-\n--\n--')", removeComments("--comment\n\ninsert into foo (values('-\n--\n--')")); + assertEquals("insert into foo (values(\"-\n--\n--\")", removeComments("--comment\n\ninsert into foo (values(\"-\n--\n--\")")); + assertEquals("insert into foo (values('abc');\ninsert into foo (values('def');", removeComments( "insert into foo (values('abc');\n--comment\ninsert into foo (values('def');")); + } + + @Test + public void testLinesEndingWithComments() { + int[] escape = {-1}; + assertEquals("show tables;", removeComments("show tables;",escape)); + assertEquals("show tables;", removeComments("show tables; --comments",escape)); + assertEquals("show tables;", removeComments("show tables; -------comments",escape)); + assertEquals("show tables;", removeComments("show tables; -------comments;one;two;three;;;;",escape)); + assertEquals("show", removeComments("show-- tables; -------comments",escape)); + assertEquals("show", removeComments("show --tables; -------comments",escape)); + assertEquals("s", removeComments("s--how --tables; -------comments",escape)); + assertEquals("", removeComments("-- show tables; -------comments",escape)); + + assertEquals("\"show tables\"", removeComments("\"show tables\" --comments",escape)); + assertEquals("\"show --comments tables\"", removeComments("\"show --comments tables\" --comments",escape)); + assertEquals("\"'show --comments' tables\"", removeComments("\"'show --comments' tables\" --comments",escape)); + assertEquals("'show --comments tables'", removeComments("'show --comments tables' --comments",escape)); + assertEquals("'\"show --comments tables\"'", removeComments("'\"show --comments tables\"' --comments",escape)); + } + + /** + * check that statement is unchanged after stripping + */ + private void assertUnchanged(String statement) { + assertEquals("statement should not have been affected by stripping commnents", statement, + removeComments(statement)); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java index 21bdcf44436a02b11f878fa439e916d4b55ac63d..50eaf18f97372be65ff74c08aba073e29c3e0306 100644 --- ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java +++ ql/src/test/org/apache/hadoop/hive/ql/processors/TestCommandProcessorFactory.java @@ -46,6 +46,8 @@ public void testInvalidCommands() throws Exception { CommandProcessorFactory.getForHiveCommand(new String[]{" "}, conf)); Assert.assertNull("Set role should have returned null", CommandProcessorFactory.getForHiveCommand(new String[]{"set role"}, conf)); + Assert.assertNull("Set role should have returned null", + CommandProcessorFactory.getForHiveCommand(new String[]{"set", "role"}, conf)); Assert.assertNull("SQL should have returned null", CommandProcessorFactory.getForHiveCommand(new String[]{"SELECT * FROM TABLE"}, conf)); Assert.assertNull("Test only command should have returned null", diff --git ql/src/test/queries/clientpositive/set_processor_namespaces.q ql/src/test/queries/clientpositive/set_processor_namespaces.q index 612807f0c871b1881446d088e1c2c399d1afe970..1f9e282c3b1b6d3acfd487b4a85c92aab72c9cc7 100644 --- ql/src/test/queries/clientpositive/set_processor_namespaces.q +++ ql/src/test/queries/clientpositive/set_processor_namespaces.q @@ -30,3 +30,11 @@ set jar=${system:maven.local.repository}/org/apache/derby/derby/${system:derby.v add file ${hiveconf:jar}; delete file ${hiveconf:jar}; list file; + + +-- comment (will be removed by test driver) +set x=1; +set x; + -- an indented comment to test comment removal +set x=2; +set x; \ No newline at end of file diff --git ql/src/test/results/clientpositive/set_processor_namespaces.q.out ql/src/test/results/clientpositive/set_processor_namespaces.q.out index c05ce4d61d00a9ee6671d97f2fd178f18d44cc8c..c1c82704e913a3166d8d87bcd9833b805a995a1f 100644 --- ql/src/test/results/clientpositive/set_processor_namespaces.q.out +++ ql/src/test/results/clientpositive/set_processor_namespaces.q.out @@ -51,3 +51,5 @@ POSTHOOK: Input: default@src 5 val_5 5 val_5 c=1 +x=1 +x=2 diff --git service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java index 2dd90b69b3bf789b1a3928129cf801b17884033f..52c320b5589691bf6c9963bd573e32e0b51ac13f 100644 --- service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java +++ service/src/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java @@ -22,6 +22,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory; +import org.apache.hive.common.util.HiveStringUtils; import org.apache.hive.service.cli.HiveSQLException; import org.apache.hive.service.cli.OperationType; import org.apache.hive.service.cli.session.HiveSession; @@ -42,7 +43,9 @@ public String getStatement() { public static ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession, String statement, Map confOverlay, boolean runAsync, long queryTimeout) throws HiveSQLException { - String[] tokens = statement.trim().split("\\s+"); + + String cleanStatement = HiveStringUtils.removeComments(statement); + String[] tokens = cleanStatement.trim().split("\\s+"); CommandProcessor processor = null; try { processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf()); @@ -51,8 +54,9 @@ public static ExecuteStatementOperation newExecuteStatementOperation(HiveSession } if (processor == null) { // runAsync, queryTimeout makes sense only for a SQLOperation + // Pass the original statement to SQLOperation as sql parser can remove comments by itself return new SQLOperation(parentSession, statement, confOverlay, runAsync, queryTimeout); } - return new HiveCommandOperation(parentSession, statement, processor, confOverlay); + return new HiveCommandOperation(parentSession, cleanStatement, processor, confOverlay); } }