From ac4b13c5b650477eedfdbeb8f82494804b41dbd8 Mon Sep 17 00:00:00 2001 From: Zoltan Matyus Date: Thu, 23 Jan 2020 15:51:37 +0100 Subject: [PATCH] HIVE-22767: beeline doesn't parse semicolons in comments properly --- .../org/apache/hive/beeline/Commands.java | 124 +++++--- .../org/apache/hive/beeline/TestCommands.java | 286 +++++++++++++++++- 2 files changed, 358 insertions(+), 52 deletions(-) diff --git a/beeline/src/java/org/apache/hive/beeline/Commands.java b/beeline/src/java/org/apache/hive/beeline/Commands.java index 8f47323700..338b105a2d 100644 --- a/beeline/src/java/org/apache/hive/beeline/Commands.java +++ b/beeline/src/java/org/apache/hive/beeline/Commands.java @@ -52,6 +52,8 @@ import java.util.Properties; import java.util.Set; import java.util.TreeSet; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.apache.hadoop.hive.common.cli.ShellCmdExecutor; import org.apache.hadoop.hive.conf.HiveConf; @@ -1220,71 +1222,93 @@ private boolean execute(String line, boolean call, boolean entireLineAsCommand) return true; } + private enum SectionType { + SINGLE_QUOTED, DOUBLE_QUOTED, LINE_COMMENT, BLOCK_COMMENT + } + /** * Helper method to parse input from Beeline and convert it to a {@link List} of commands that * can be executed. This method contains logic for handling delimiters that are placed within * quotations. It iterates through each character in the line and checks to see if it is the delimiter, ', * or " */ - private List getCmdList(String line, boolean entireLineAsCommand) { - List cmdList = new ArrayList(); + List getCmdList(String line, boolean entireLineAsCommand) { if (entireLineAsCommand) { - cmdList.add(line); - } else { - StringBuilder command = new StringBuilder(); - - // Marker to track if there is starting double quote without an ending double quote - boolean hasUnterminatedDoubleQuote = false; + return Stream.of(line).collect(Collectors.toList()); + } + List cmdList = new ArrayList(); + StringBuilder command = new StringBuilder(); - // Marker to track if there is starting single quote without an ending double quote - boolean hasUnterminatedSingleQuote = false; + // Marker to track if there is a special section open + SectionType sectionType = null; - // Index of the last seen delimiter in the given line - int lastDelimiterIndex = 0; + // Index of the last seen delimiter in the given line + int lastDelimiterIndex = 0; - // Marker to track if the previous character was an escape character - boolean wasPrevEscape = false; + // Marker to track if the previous character was an escape character + boolean wasPrevEscape = false; - int index = 0; + int index = 0; - // Iterate through the line and invoke the addCmdPart method whenever the delimiter is seen that is not inside a - // quoted string - for (; index < line.length();) { - if (line.startsWith("\'", index)) { - // If a single quote is seen and the index is not inside a double quoted string and the previous character - // was not an escape, then update the hasUnterminatedSingleQuote flag - if (!hasUnterminatedDoubleQuote && !wasPrevEscape) { - hasUnterminatedSingleQuote = !hasUnterminatedSingleQuote; - } - wasPrevEscape = false; - index++; - } else if (line.startsWith("\"", index)) { - // If a double quote is seen and the index is not inside a single quoted string and the previous character - // was not an escape, then update the hasUnterminatedDoubleQuote flag - if (!hasUnterminatedSingleQuote && !wasPrevEscape) { - hasUnterminatedDoubleQuote = !hasUnterminatedDoubleQuote; - } - wasPrevEscape = false; - index++; - } else if (line.startsWith(beeLine.getOpts().getDelimiter(), index)) { - // If the delimiter is seen, and the line isn't inside a quoted string, then treat - // line[lastDelimiterIndex] to line[index] as a single command - if (!hasUnterminatedDoubleQuote && !hasUnterminatedSingleQuote) { - addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); - lastDelimiterIndex = index + beeLine.getOpts().getDelimiter().length(); - } - wasPrevEscape = false; - index += beeLine.getOpts().getDelimiter().length(); - } else { - wasPrevEscape = line.startsWith("\\", index) && !wasPrevEscape; - index++; - } - } - // If the line doesn't end with the delimiter or if the line is empty, add the cmd part - if (lastDelimiterIndex != index || line.length() == 0) { + // Iterate through the line and invoke the addCmdPart method whenever the delimiter is seen that is not inside a + // quoted string + for (; index < line.length();) { + if (!wasPrevEscape && sectionType == null && line.startsWith("'", index)) { + // Opening non-escaped single quote + sectionType = SectionType.SINGLE_QUOTED; + index++; + } else if (!wasPrevEscape && sectionType == SectionType.SINGLE_QUOTED && line.startsWith("'", index)) { + // Closing non-escaped single quote + sectionType = null; + index++; + } else if (!wasPrevEscape && sectionType == null && line.startsWith("\"", index)) { + // Opening non-escaped double quote + sectionType = SectionType.DOUBLE_QUOTED; + index++; + } else if (!wasPrevEscape && sectionType == SectionType.DOUBLE_QUOTED && line.startsWith("\"", index)) { + // Closing non-escaped double quote + sectionType = null; + index++; + } else if (sectionType == null && line.startsWith("--", index)) { + // Opening line comment with (non-escapable?) double-dash + sectionType = SectionType.LINE_COMMENT; + wasPrevEscape = false; + index += 2; + } else if (sectionType == SectionType.LINE_COMMENT && line.startsWith("\n", index)) { + // Closing line comment with (non-escapable?) newline + sectionType = null; + wasPrevEscape = false; + index++; + } else if (sectionType == null && line.startsWith("/*", index)) { + // Opening block comment with (non-escapable?) /* + sectionType = SectionType.BLOCK_COMMENT; + wasPrevEscape = false; + index += 2; + } else if (sectionType == SectionType.BLOCK_COMMENT && line.startsWith("*/", index)) { + // Closing line comment with (non-escapable?) newline + sectionType = null; + wasPrevEscape = false; + index += 2; + } else if (line.startsWith("\\", index)) { + // Escape character seen (anywhere) + wasPrevEscape = !wasPrevEscape; + index++; + } else if (sectionType == null && line.startsWith(beeLine.getOpts().getDelimiter(), index)) { + // If the delimiter is seen, and the line isn't inside a section, then treat + // line[lastDelimiterIndex] to line[index] as a single command addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); + index += beeLine.getOpts().getDelimiter().length(); + lastDelimiterIndex = index; + wasPrevEscape = false; + } else { + wasPrevEscape = false; + index++; } } + // If the line doesn't end with the delimiter or if the line is empty, add the cmd part + if (lastDelimiterIndex != index || line.length() == 0) { + addCmdPart(cmdList, command, line.substring(lastDelimiterIndex, index)); + } return cmdList; } diff --git a/beeline/src/test/org/apache/hive/beeline/TestCommands.java b/beeline/src/test/org/apache/hive/beeline/TestCommands.java index 567ca25270..2145b5c83a 100644 --- a/beeline/src/test/org/apache/hive/beeline/TestCommands.java +++ b/beeline/src/test/org/apache/hive/beeline/TestCommands.java @@ -18,12 +18,13 @@ package org.apache.hive.beeline; -import org.junit.Test; - import static org.apache.hive.common.util.HiveStringUtils.removeComments; import static org.junit.Assert.assertEquals; import java.io.IOException; +import java.util.Arrays; + +import org.junit.Test; public class TestCommands { @@ -59,5 +60,286 @@ public void testBeelineCommands() throws IOException { BeeLine.mainWithInputRedirection( new String[] {"-u", "jdbc:hive2://", "-e", "create table t1(x int); show tables"}, null); } + + /** + * Test {@link Commands#getCmdList(String, boolean)} with various nesting of special characters: + * apostrophe, quotation mark, newline, comment start, semicolon. + * @throws Exception + */ + @Test + public void testGetCmdList() throws Exception { + BeeLine beeline = new BeeLine(); + Commands commands = new Commands(beeline); + + try { + // COMMANDS, WHITE SPACES + + // trivial + assertEquals( + Arrays.asList(""), + commands.getCmdList("", false) + ); + assertEquals( + Arrays.asList(""), + commands.getCmdList(";", false) + ); + assertEquals( + Arrays.asList(" "), + commands.getCmdList(" ;", false) + ); + assertEquals( + Arrays.asList("", " "), + commands.getCmdList("; ", false) + ); + assertEquals( + Arrays.asList(" ", " "), + commands.getCmdList(" ; ", false) + ); + assertEquals( + Arrays.asList(" ; "), + commands.getCmdList(" \\; ", false) + ); + assertEquals( + Arrays.asList("select 1"), + commands.getCmdList("select 1;", false) + ); + assertEquals( + Arrays.asList("select 1"), + commands.getCmdList("select 1", false) + ); + // add whitespace + assertEquals( + Arrays.asList(" \n select \n 1 \n "), + commands.getCmdList(" \n select \n 1 \n ;", false) + ); + // add whitespace after semicolon + assertEquals( + Arrays.asList(" \n select 1 \n ", " \n "), + commands.getCmdList(" \n select 1 \n ; \n ", false) + ); + // second command + assertEquals( + Arrays.asList("select 1", "select 2"), + commands.getCmdList("select 1;select 2;", false) + ); + // second command, no ending semicolon + assertEquals( + Arrays.asList("select 1", "select 2"), + commands.getCmdList("select 1;select 2", false) + ); + // three commands with whitespaces + assertEquals( + Arrays.asList(" \n select \t 1", "\tselect\n2\r", " select\n3", " "), + commands.getCmdList(" \n select \t 1;\tselect\n2\r; select\n3; ", false) + ); + + // ADD STRINGS + + // trivial string + assertEquals( + Arrays.asList("select 'foo'"), + commands.getCmdList("select 'foo';", false) + ); + assertEquals( + Arrays.asList("select \"foo\""), + commands.getCmdList("select \"foo\";", false) + ); + assertEquals( + Arrays.asList("select 'foo'", " select 2"), + commands.getCmdList("select 'foo'; select 2;", false) + ); + assertEquals( + Arrays.asList("select \"foo\"", " select 2"), + commands.getCmdList("select \"foo\"; select 2", false) + ); + assertEquals( + Arrays.asList("select ''", " select \"\""), + commands.getCmdList("select ''; select \"\"", false) + ); + // string containing delimiter of other string + assertEquals( + Arrays.asList("select 'foo\"bar'"), + commands.getCmdList("select 'foo\"bar';", false) + ); + assertEquals( + Arrays.asList("select \"foo'bar\""), + commands.getCmdList("select \"foo'bar\";", false) + ); + assertEquals( + Arrays.asList("select 'foo\"bar'", " select 'foo\"bar'"), + commands.getCmdList("select 'foo\"bar'; select 'foo\"bar';", false) + ); + assertEquals( + Arrays.asList("select \"foo'bar\"", " select \"foo'bar\""), + commands.getCmdList("select \"foo'bar\"; select \"foo'bar\"", false) + ); + assertEquals( + Arrays.asList("select '\"' ", " select \"'\" "), + commands.getCmdList("select '\"' ; select \"'\" ;", false) + ); + // string containing semicolon + assertEquals( + Arrays.asList("select 'foo;bar'"), + commands.getCmdList("select 'foo;bar';", false) + ); + assertEquals( + Arrays.asList("select \"foo;bar\""), + commands.getCmdList("select \"foo;bar\";", false) + ); + // two selects of strings vs. one select containing semicolon + assertEquals( + Arrays.asList("select '\"foobar'", " select 'foobar\"'"), + commands.getCmdList("select '\"foobar'; select 'foobar\"';", false) + ); + assertEquals( + Arrays.asList("select \"'foobar'; select 'foobar'\""), + commands.getCmdList("select \"'foobar'; select 'foobar'\";", false) + ); + // newline within strings + assertEquals( + Arrays.asList("select 'multi\nline\nstring'", " select 'allowed'"), + commands.getCmdList("select 'multi\nline\nstring'; select 'allowed';", false) + ); + assertEquals( + Arrays.asList("select \"multi\nline\nstring\"", " select \"allowed\""), + commands.getCmdList("select \"multi\nline\nstring\"; select \"allowed\";", false) + ); + assertEquals( + Arrays.asList("select ';\nselect 1;\n'", " select 'sql within string'"), + commands.getCmdList("select ';\nselect 1;\n'; select 'sql within string';", false) + ); + // escaped quotation marks in strings + assertEquals( + Arrays.asList("select 'fo\\'o'"), + commands.getCmdList("select 'fo\\'o';", false) + ); + assertEquals( + Arrays.asList("select \"fo\\\"o\""), + commands.getCmdList("select \"fo\\\"o\";", false) + ); + assertEquals( + Arrays.asList("select 'fo\\\"o'"), + commands.getCmdList("select 'fo\\\"o';", false) + ); + assertEquals( + Arrays.asList("select \"fo\\'o\""), + commands.getCmdList("select \"fo\\'o\";", false) + ); + // strings ending with backslash + assertEquals( + Arrays.asList("select 'foo\\\\'", " select \"bar\\\\\""), + commands.getCmdList("select 'foo\\\\'; select \"bar\\\\\";", false) + ); + + // ADD LINE COMMENTS + + // line comments + assertEquals( + Arrays.asList("select 1", " -- comment\nselect 2", " -- comment\n"), + commands.getCmdList("select 1; -- comment\nselect 2; -- comment\n", false) + ); + assertEquals( + Arrays.asList("select -- comment\n1", " select -- comment\n2"), + commands.getCmdList("select -- comment\n1; select -- comment\n2;", false) + ); + assertEquals( + Arrays.asList("select -- comment 1; select -- comment 2;"), + commands.getCmdList("select -- comment 1; select -- comment 2;", false) + ); + assertEquals( + Arrays.asList("select -- comment\\\n1", " select -- comment\\\n2"), + commands.getCmdList("select -- comment\\\n1; select -- comment\\\n2;", false) + ); + // line comments with semicolons + assertEquals( + Arrays.asList("select 1 -- invalid;\nselect 2"), + commands.getCmdList("select 1 -- invalid;\nselect 2;", false) + ); + assertEquals( + Arrays.asList("select 1 -- valid\n", "select 2"), + commands.getCmdList("select 1 -- valid\n;select 2;", false) + ); + // line comments with quotation marks + assertEquals( + Arrays.asList("select 1 -- v'lid\n", "select 2", "select 3"), + commands.getCmdList("select 1 -- v'lid\n;select 2;select 3;", false) + ); + assertEquals( + Arrays.asList("select 1 -- v\"lid\n", "select 2", "select 3"), + commands.getCmdList("select 1 -- v\"lid\n;select 2;select 3;", false) + ); + assertEquals( + Arrays.asList("", "select 1 -- '\n", "select \"'\"", "select 3 -- \"\n", "?"), + commands.getCmdList(";select 1 -- '\n;select \"'\";select 3 -- \"\n;?", false) + ); + assertEquals( + Arrays.asList("", "select 1 -- ';select \"'\"\n", "select 3 -- \"\n", "?"), + commands.getCmdList(";select 1 -- ';select \"'\"\n;select 3 -- \"\n;?", false) + ); + + // ADD BLOCK COMMENTS + + // block comments with semicolons + assertEquals( + Arrays.asList("select 1", " select /* */ 2", " select /* */ 3"), + commands.getCmdList("select 1; select /* */ 2; select /* */ 3;", false) + ); + assertEquals( + Arrays.asList("select 1", " select /* ; */ 2", " select /* ; */ 3"), + commands.getCmdList("select 1; select /* ; */ 2; select /* ; */ 3;", false) + ); + assertEquals( + Arrays.asList("select 1 /* c1; */", " /**/ select 2 /*/ c3; /*/", " select 3", " /* c4 */"), + commands.getCmdList("select 1 /* c1; */; /**/ select 2 /*/ c3; /*/; select 3; /* c4 */", false) + ); + // block comments with line comments + assertEquals( + Arrays.asList("select 1 --lc /* fake bc\n", "select 2 --lc */\n"), + commands.getCmdList("select 1 --lc /* fake bc\n;select 2 --lc */\n;", false) + ); + assertEquals( + Arrays.asList("select 1 /*bc -- fake lc\n;select 2 --lc */\n"), + commands.getCmdList("select 1 /*bc -- fake lc\n;select 2 --lc */\n;", false) + ); + // block comments with quotation marks + assertEquals( + Arrays.asList("select 1 /* v'lid */", "select 2", "select 3"), + commands.getCmdList("select 1 /* v'lid */;select 2;select 3;", false) + ); + assertEquals( + Arrays.asList("select 1 /* v\"lid */", "select 2", "select 3"), + commands.getCmdList("select 1 /* v\"lid */;select 2;select 3;", false) + ); + assertEquals( + Arrays.asList("", "select 1 /* ' */", "select \"'\"", "select 3 /* \" */", "?"), + commands.getCmdList(";select 1 /* ' */;select \"'\";select 3 /* \" */;?", false) + ); + assertEquals( + Arrays.asList("", "select 1 /*/ ' ;select \"'\" /*/", "select 3 /* \" */", "?"), + commands.getCmdList(";select 1 /*/ ' ;select \"'\" /*/;select 3 /* \" */;?", false) + ); + + // UNTERMINATED STRING, COMMENT + + assertEquals( + Arrays.asList("select 1", " -- ;\\';\\\";--; ;/*;*/; '; ';\";\";"), + commands.getCmdList("select 1; -- ;\\';\\\";--; ;/*;*/; '; ';\";\";", false) + ); + assertEquals( + Arrays.asList("select 1", " /* ;\\';\\\";--;\n;/*; ; '; ';\";\";"), + commands.getCmdList("select 1; /* ;\\';\\\";--;\n;/*; ; '; ';\";\";", false) + ); + assertEquals( + Arrays.asList("select 1", " ' ;\\';\\\";--;\n;/*;*/; ; ;\";\";"), + commands.getCmdList("select 1; ' ;\\';\\\";--;\n;/*;*/; ; ;\";\";", false) + ); + assertEquals( + Arrays.asList("select 1", " \" ;\\';\\\";--;\n;/*;*/; '; '; ; ;"), + commands.getCmdList("select 1; \" ;\\';\\\";--;\n;/*;*/; '; '; ; ;", false) + ); + } finally { + beeline.close(); + } + } } -- 2.23.0