From 6c9db479963b74685c2c68779b4b5cd298d5dfd7 Mon Sep 17 00:00:00 2001 From: Zoltan Matyus Date: Thu, 23 Jan 2020 15:51:37 +0100 Subject: [PATCH] HIVE-22767: beeline doesn't parse semicolons in comments properly --- .../org/apache/hive/beeline/Commands.java | 124 ++++++++------ .../org/apache/hive/beeline/TestCommands.java | 159 +++++++++++++++++- 2 files changed, 231 insertions(+), 52 deletions(-) diff --git a/beeline/src/java/org/apache/hive/beeline/Commands.java b/beeline/src/java/org/apache/hive/beeline/Commands.java index 8f47323700..338b105a2d 100644 --- a/beeline/src/java/org/apache/hive/beeline/Commands.java +++ b/beeline/src/java/org/apache/hive/beeline/Commands.java @@ -52,6 +52,8 @@ import java.util.Properties; import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.hadoop.hive.common.cli.ShellCmdExecutor; import org.apache.hadoop.hive.conf.HiveConf; @@ -1220,71 +1222,93 @@ private boolean execute(String line, boolean call, boolean entireLineAsCommand) return true; } + private enum SectionType { + SINGLE_QUOTED, DOUBLE_QUOTED, LINE_COMMENT, BLOCK_COMMENT + } + /** * Helper method to parse input from Beeline and convert it to a {@link List} of commands that * can be executed. This method contains logic for handling delimiters that are placed within * quotations. It iterates through each character in the line and checks to see if it is the delimiter, ', * or " */ - private List getCmdList(String line, boolean entireLineAsCommand) { - List cmdList = new ArrayList(); + List getCmdList(String line, boolean entireLineAsCommand) { if (entireLineAsCommand) { - cmdList.add(line); - } else { - StringBuilder command = new StringBuilder(); - - // Marker to track if there is starting double quote without an ending double quote - boolean hasUnterminatedDoubleQuote = false; + return Stream.of(line).collect(Collectors.toList()); + } + List cmdList = new ArrayList(); + StringBuilder command = new StringBuilder(); - // Marker to track if there is starting single quote without an ending double quote - boolean hasUnterminatedSingleQuote = false; + // Marker to track if there is a special section open + SectionType sectionType = null; - // Index of the last seen delimiter in the given line - int lastDelimiterIndex = 0; + // Index of the last seen delimiter in the given line + int lastDelimiterIndex = 0; - // Marker to track if the previous character was an escape character - boolean wasPrevEscape = false; + // Marker to track if the previous character was an escape character + boolean wasPrevEscape = false; - int index = 0; + int index = 0; - // Iterate through the line and invoke the addCmdPart method whenever the delimiter is seen that is not inside a - // quoted string - for (; index < line.length();) { - if (line.startsWith("\'", index)) { - // If a single quote is seen and the index is not inside a double quoted string and the previous character - // was not an escape, then update the hasUnterminatedSingleQuote flag - if (!hasUnterminatedDoubleQuote && !wasPrevEscape) { - hasUnterminatedSingleQuote = !hasUnterminatedSingleQuote; - } - wasPrevEscape = false; - index++; - } else if (line.startsWith("\"", index)) { - // If a double quote is seen and the index is not inside a single quoted string and the previous character - // was not an escape, then update the hasUnterminatedDoubleQuote flag - if (!hasUnterminatedSingleQuote && !wasPrevEscape) { - hasUnterminatedDoubleQuote = !hasUnterminatedDoubleQuote; - } - wasPrevEscape = false; - index++; - } else if (line.startsWith(beeLine.getOpts().getDelimiter(), index)) { - // If the delimiter is seen, and the line isn't inside a quoted string, then treat - // line[lastDelimiterIndex] to line[index] as a single command - if (!hasUnterminatedDoubleQuote && !hasUnterminatedSingleQuote) { - addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); - lastDelimiterIndex = index + beeLine.getOpts().getDelimiter().length(); - } - wasPrevEscape = false; - index += beeLine.getOpts().getDelimiter().length(); - } else { - wasPrevEscape = line.startsWith("\\", index) && !wasPrevEscape; - index++; - } - } - // If the line doesn't end with the delimiter or if the line is empty, add the cmd part - if (lastDelimiterIndex != index || line.length() == 0) { + // Iterate through the line and invoke the addCmdPart method whenever the delimiter is seen that is not inside a + // quoted string + for (; index < line.length();) { + if (!wasPrevEscape && sectionType == null && line.startsWith("'", index)) { + // Opening non-escaped single quote + sectionType = SectionType.SINGLE_QUOTED; + index++; + } else if (!wasPrevEscape && sectionType == SectionType.SINGLE_QUOTED && line.startsWith("'", index)) { + // Closing non-escaped single quote + sectionType = null; + index++; + } else if (!wasPrevEscape && sectionType == null && line.startsWith("\"", index)) { + // Opening non-escaped double quote + sectionType = SectionType.DOUBLE_QUOTED; + index++; + } else if (!wasPrevEscape && sectionType == SectionType.DOUBLE_QUOTED && line.startsWith("\"", index)) { + // Closing non-escaped double quote + sectionType = null; + index++; + } else if (sectionType == null && line.startsWith("--", index)) { + // Opening line comment with (non-escapable?) double-dash + sectionType = SectionType.LINE_COMMENT; + wasPrevEscape = false; + index += 2; + } else if (sectionType == SectionType.LINE_COMMENT && line.startsWith("\n", index)) { + // Closing line comment with (non-escapable?) newline + sectionType = null; + wasPrevEscape = false; + index++; + } else if (sectionType == null && line.startsWith("/*", index)) { + // Opening block comment with (non-escapable?) /* + sectionType = SectionType.BLOCK_COMMENT; + wasPrevEscape = false; + index += 2; + } else if (sectionType == SectionType.BLOCK_COMMENT && line.startsWith("*/", index)) { + // Closing line comment with (non-escapable?) newline + sectionType = null; + wasPrevEscape = false; + index += 2; + } else if (line.startsWith("\\", index)) { + // Escape character seen (anywhere) + wasPrevEscape = !wasPrevEscape; + index++; + } else if (sectionType == null && line.startsWith(beeLine.getOpts().getDelimiter(), index)) { + // If the delimiter is seen, and the line isn't inside a section, then treat + // line[lastDelimiterIndex] to line[index] as a single command addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); + index += beeLine.getOpts().getDelimiter().length(); + lastDelimiterIndex = index; + wasPrevEscape = false; + } else { + wasPrevEscape = false; + index++; } } + // If the line doesn't end with the delimiter or if the line is empty, add the cmd part + if (lastDelimiterIndex != index || line.length() == 0) { + addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); + } return cmdList; } diff --git a/beeline/src/test/org/apache/hive/beeline/TestCommands.java b/beeline/src/test/org/apache/hive/beeline/TestCommands.java index 567ca25270..3b2002f8e9 100644 --- a/beeline/src/test/org/apache/hive/beeline/TestCommands.java +++ b/beeline/src/test/org/apache/hive/beeline/TestCommands.java @@ -18,12 +18,15 @@ package org.apache.hive.beeline; -import org.junit.Test; - import static org.apache.hive.common.util.HiveStringUtils.removeComments; +import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import org.junit.Test; public class TestCommands { @@ -59,5 +62,157 @@ public void testBeelineCommands() throws IOException { BeeLine.mainWithInputRedirection( new String[] {"-u", "jdbc:hive2://", "-e", "create table t1(x int); show tables"}, null); } + + /** + * Helper method for {@link #testGetCmdList()}, to invoke {@link Commands#getCmdList(String, boolean)} once and + * check the results. + * @param expectedString The expected list of commands, in one string, joined by the pipe ('\') character. + * @param line The actual command line passed to {@link Commands#getCmdList(String, boolean)}. + * @param commands The {@link Commands} object on which to call the method. + */ + private void invokeGetCmdList(Commands commands, String line, String expectedString) { + String[] expectedArray = expectedString.split(Pattern.quote("|")); + try { + assertArrayEquals(expectedArray, commands.getCmdList(line, false).toArray()); + } catch (AssertionError error) { + String actual = commands.getCmdList(line, false).stream().collect(Collectors.joining("|")); + System.err.println("invokeGetCmdList failed for: "+line.replace("\n", "\\n")); + System.err.println(" expected: |" + expectedString.replace("\n", "\\n") + "|"); + System.err.println(" actual : |" + actual.replace("\n", "\\n") + "|"); + throw error; + } + } + + /** + * Test {@link Commands#getCmdList(String, boolean)} with various nesting of special characters: + * apostrophe, quotation mark, newline, comment start, semicolon. + * @throws Exception + */ + @Test + public void testGetCmdList() throws Exception { + // ByteArrayOutputStream os = new ByteArrayOutputStream(); + // PrintStream ops = new PrintStream(os); + BeeLine beeline = new BeeLine(); + // beeline.setOutputStream(ops); + // beeline.initializeConsoleReader(null); + Commands commands = new Commands(beeline); + + try { + // COMMANDS, WHITE SPACES + + // trivial + invokeGetCmdList(commands, "", ""); + invokeGetCmdList(commands, ";", ""); + invokeGetCmdList(commands, " ;", " "); + invokeGetCmdList(commands, "; ", "| "); + invokeGetCmdList(commands, " ; ", " | "); + invokeGetCmdList(commands, " \\; ", " ; "); + invokeGetCmdList(commands, "select 1;", "select 1"); + invokeGetCmdList(commands, "select 1", "select 1"); + // add whitespace + invokeGetCmdList(commands, " \n select \n 1 \n ;", " \n select \n 1 \n "); + // add whitespace after semicolon + invokeGetCmdList(commands, " \n select 1 \n ; \n ", " \n select 1 \n | \n "); + // second command + invokeGetCmdList(commands, "select 1;select 2;", "select 1|select 2"); + // second command, no ending semicolon + invokeGetCmdList(commands, "select 1;select 2", "select 1|select 2"); + // three commands with whitespaces + invokeGetCmdList(commands, " \n select \t 1;\tselect\n2\r; select\n3; ", + " \n select \t 1|\tselect\n2\r| select\n3| "); + + // ADD STRINGS + + // trivial string + invokeGetCmdList(commands, "select 'foo';", "select 'foo'"); + invokeGetCmdList(commands, "select \"foo\";", "select \"foo\""); + invokeGetCmdList(commands, "select 'foo'; select 2;", "select 'foo'| select 2"); + invokeGetCmdList(commands, "select \"foo\"; select 2", "select \"foo\"| select 2"); + invokeGetCmdList(commands, "select ''; select \"\"", "select ''| select \"\""); + // string containing delimiter of other string + invokeGetCmdList(commands, "select 'foo\"bar';", "select 'foo\"bar'"); + invokeGetCmdList(commands, "select \"foo'bar\";", "select \"foo'bar\""); + invokeGetCmdList(commands, "select 'foo\"bar'; select 'foo\"bar';", "select 'foo\"bar'| select 'foo\"bar'"); + invokeGetCmdList(commands, "select \"foo'bar\"; select \"foo'bar\"", "select \"foo'bar\"| select \"foo'bar\""); + invokeGetCmdList(commands, "select '\"' ; select \"'\" ;", "select '\"' | select \"'\" "); + // string containing semicolon + invokeGetCmdList(commands, "select 'foo;bar';", "select 'foo;bar'"); + invokeGetCmdList(commands, "select \"foo;bar\";", "select \"foo;bar\""); + // two selects of strings vs. one select containing semicolon + invokeGetCmdList(commands, "select '\"foobar'; select 'foobar\"';", "select '\"foobar'| select 'foobar\"'"); + invokeGetCmdList(commands, "select \"'foobar'; select 'foobar'\";", "select \"'foobar'; select 'foobar'\""); + // newline within strings + invokeGetCmdList(commands, "select 'multi\nline\nstring'; select 'allowed';", + "select 'multi\nline\nstring'| select 'allowed'"); + invokeGetCmdList(commands, "select \"multi\nline\nstring\"; select \"allowed\";", + "select \"multi\nline\nstring\"| select \"allowed\""); + invokeGetCmdList(commands, "select ';\nselect 1;\n'; select 'sql within string';", + "select ';\nselect 1;\n'| select 'sql within string'"); + // escaped quotation marks in strings + invokeGetCmdList(commands, "select 'fo\\'o';", "select 'fo\\'o'"); + invokeGetCmdList(commands, "select \"fo\\\"o\";", "select \"fo\\\"o\""); + invokeGetCmdList(commands, "select 'fo\\\"o';", "select 'fo\\\"o'"); + invokeGetCmdList(commands, "select \"fo\\'o\";", "select \"fo\\'o\""); + // strings ending with backslash + invokeGetCmdList(commands, "select 'foo\\\\'; select \"bar\\\\\";", "select 'foo\\\\'| select \"bar\\\\\""); + + // ADD LINE COMMENTS + + // line comments + invokeGetCmdList(commands, "select 1; -- comment\nselect 2; -- comment\n", + "select 1| -- comment\nselect 2| -- comment\n"); + invokeGetCmdList(commands, "select -- comment\n1; select -- comment\n2;", + "select -- comment\n1| select -- comment\n2"); + invokeGetCmdList(commands, "select -- comment 1; select -- comment 2;", + "select -- comment 1; select -- comment 2;"); + invokeGetCmdList(commands, "select -- comment\\\n1; select -- comment\\\n2;", + "select -- comment\\\n1| select -- comment\\\n2"); + // line comments with semicolons + invokeGetCmdList(commands, "select 1 -- invalid;\nselect 2;", "select 1 -- invalid;\nselect 2"); + invokeGetCmdList(commands, "select 1 -- valid\n;select 2;", "select 1 -- valid\n|select 2"); + // line comments with quotation marks + invokeGetCmdList(commands, "select 1 -- v'lid\n;select 2;select 3;", "select 1 -- v'lid\n|select 2|select 3"); + invokeGetCmdList(commands, "select 1 -- v\"lid\n;select 2;select 3;", "select 1 -- v\"lid\n|select 2|select 3"); + invokeGetCmdList(commands, ";select 1 -- '\n;select \"'\";select 3 -- \"\n;?", + "|select 1 -- '\n|select \"'\"|select 3 -- \"\n|?"); + invokeGetCmdList(commands, ";select 1 -- ';select \"'\"\n;select 3 -- \"\n;?", + "|select 1 -- ';select \"'\"\n|select 3 -- \"\n|?"); + + // ADD BLOCK COMMENTS + + // block comments with semicolons + invokeGetCmdList(commands, "select 1; select /* */ 2; select /* */ 3;", + "select 1| select /* */ 2| select /* */ 3"); + invokeGetCmdList(commands, "select 1; select /* ; */ 2; select /* ; */ 3;", + "select 1| select /* ; */ 2| select /* ; */ 3"); + invokeGetCmdList(commands, "select 1 /* c1; */; /**/ select 2 /*/ c3; /*/; select 3; /* c4 */", + "select 1 /* c1; */| /**/ select 2 /*/ c3; /*/| select 3| /* c4 */"); + // block comments with line comments + invokeGetCmdList(commands, "select 1 --lc /* fake bc\n;select 2 --lc */\n;", + "select 1 --lc /* fake bc\n|select 2 --lc */\n"); + invokeGetCmdList(commands, "select 1 /*bc -- fake lc\n;select 2 --lc */\n;", + "select 1 /*bc -- fake lc\n;select 2 --lc */\n"); + // block comments with quotation marks + invokeGetCmdList(commands, "select 1 /* v'lid */;select 2;select 3;", "select 1 /* v'lid */|select 2|select 3"); + invokeGetCmdList(commands, "select 1 /* v\"lid */;select 2;select 3;", "select 1 /* v\"lid */|select 2|select 3"); + invokeGetCmdList(commands, ";select 1 /* ' */;select \"'\";select 3 /* \" */;?", + "|select 1 /* ' */|select \"'\"|select 3 /* \" */|?"); + invokeGetCmdList(commands, ";select 1 /*/ ' ;select \"'\" /*/;select 3 /* \" */;?", + "|select 1 /*/ ' ;select \"'\" /*/|select 3 /* \" */|?"); + + // UNTERMINATED STRING, COMMENT + + invokeGetCmdList(commands, "select 1; -- ;\\';\\\";--; ;/*;*/; '; ';\";\";", + "select 1| -- ;\\';\\\";--; ;/*;*/; '; ';\";\";"); + invokeGetCmdList(commands, "select 1; /* ;\\';\\\";--;\n;/*; ; '; ';\";\";", + "select 1| /* ;\\';\\\";--;\n;/*; ; '; ';\";\";"); + invokeGetCmdList(commands, "select 1; ' ;\\';\\\";--;\n;/*;*/; ; ;\";\";", + "select 1| ' ;\\';\\\";--;\n;/*;*/; ; ;\";\";"); + invokeGetCmdList(commands, "select 1; \" ;\\';\\\";--;\n;/*;*/; '; '; ; ;", + "select 1| \" ;\\';\\\";--;\n;/*;*/; '; '; ; ;"); + } finally { + beeline.close(); + } + } } -- 2.23.0