diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLine.java b/beeline/src/java/org/apache/hive/beeline/BeeLine.java index 4caf28c..b2db66b 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -1003,6 +1003,7 @@ private String obtainPasswordFromFile(String passwordFilePath) { public void updateOptsForCli() { getOpts().updateBeeLineOptsFromConf(); getOpts().setShowHeader(false); + getOpts().setEscapeCRLF(false); getOpts().setOutputFormat("dsv"); getOpts().setDelimiterForDSV(' '); getOpts().setNullEmptyString(true); diff --git a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index e57b2d7..85052d9 100644 --- a/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ b/beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -70,6 +70,7 @@ private boolean silent = false; private boolean color = false; private boolean showHeader = true; + private boolean escapeCRLF = false; private boolean showDbInPrompt = false; private int headerInterval = 100; private boolean fastConnect = true; @@ -495,6 +496,21 @@ public boolean getShowHeader() { } } + public void setEscapeCRLF(boolean escapeCRLF) { + this.escapeCRLF = escapeCRLF; + } + + public boolean getEscapeCRLF() { + if (beeLine.isBeeLine()) { + return escapeCRLF; + } else { + boolean flag; + HiveConf conf = beeLine.getCommands().getHiveConf(true); + flag = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_ESCAPE_CRLF); + return flag; + } + } + public void setShowDbInPrompt(boolean showDbInPrompt) { this.showDbInPrompt = showDbInPrompt; } diff --git a/beeline/src/java/org/apache/hive/beeline/Rows.java b/beeline/src/java/org/apache/hive/beeline/Rows.java index 448f123..e3b983e 100644 --- a/beeline/src/java/org/apache/hive/beeline/Rows.java +++ b/beeline/src/java/org/apache/hive/beeline/Rows.java @@ -30,6 +30,8 @@ import java.util.Arrays; import java.util.Iterator; +import org.apache.hadoop.hive.common.cli.EscapeCRLFHelper; + /** * Abstract base class representing a set of rows to be displayed. * Holds column values as strings @@ -168,6 +170,10 @@ public String toString(){ value = o.toString(); } + if (beeLine.getOpts().getEscapeCRLF()) { + value = EscapeCRLFHelper.escapeCRLF(value); + } + values[i] = value.intern(); sizes[i] = value.length(); } diff --git a/beeline/src/main/resources/BeeLine.properties b/beeline/src/main/resources/BeeLine.properties index 707188e..6fca953 100644 --- a/beeline/src/main/resources/BeeLine.properties +++ b/beeline/src/main/resources/BeeLine.properties @@ -173,6 +173,7 @@ cmd-usage: Usage: java org.apache.hive.cli.beeline.BeeLine \n \ \ --property-file= the file to read connection properties (url, driver, user, password) from\n \ \ --color=[true/false] control whether color is used for display\n \ \ --showHeader=[true/false] show column names in query results\n \ +\ --escapeCRLF=[true/false] show carriage return and line feeds in query results as escaped \\r and \\n \n \ \ --headerInterval=ROWS; the interval between which heades are displayed\n \ \ --fastConnect=[true/false] skip building table/column list for tab-completion\n \ \ --autoCommit=[true/false] enable/disable automatic transaction commit\n \ diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 8f6b52c..a78e0c6 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hive.common.HiveInterruptUtils; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; +import org.apache.hadoop.hive.common.cli.EscapeCRLFHelper; import org.apache.hadoop.hive.common.cli.ShellCmdExecutor; import org.apache.hadoop.hive.common.io.CachingPrintStream; import org.apache.hadoop.hive.common.io.FetchConverter; @@ -222,6 +223,7 @@ private String getFirstCmd(String cmd, int length) { int processLocalCmd(String cmd, CommandProcessor proc, CliSessionState ss) { int tryCount = 0; boolean needRetry; + boolean escapeCRLF = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CLI_PRINT_ESCAPE_CRLF); int ret = 0; do { @@ -259,6 +261,9 @@ int processLocalCmd(String cmd, CommandProcessor proc, CliSessionState ss) { } while (qp.getResults(res)) { for (String r : res) { + if (escapeCRLF) { + r = EscapeCRLFHelper.escapeCRLF(r); + } out.println(r); } counter += res.size(); diff --git a/common/src/java/org/apache/hadoop/hive/common/cli/EscapeCRLFHelper.java b/common/src/java/org/apache/hadoop/hive/common/cli/EscapeCRLFHelper.java new file mode 100644 index 0000000..11afd0d --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/cli/EscapeCRLFHelper.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.cli; + +public class EscapeCRLFHelper { + + private static final char CARRIAGE_RETURN = '\r'; + private static final char LINE_FEED = '\n'; + + public EscapeCRLFHelper() { + } + + /* + * Substitute for any carriage return or line feed characters in line with the escaped + * 2-character sequences \r or \n. + * + * @param line the string for the CRLF substitution. + * @return If there were no replacements, then just return line. Otherwise, a new String with + * escaped CRLF. + */ + public static String escapeCRLF(String line) { + + StringBuilder sb = null; + int lastNonCRLFIndex = 0; + int index = 0; + final int length = line.length(); + while (index < length) { + char ch = line.charAt(index); + if (ch == CARRIAGE_RETURN || ch == LINE_FEED) { + if (sb == null) { + + // We defer allocation until we really need it since in the common case there is + // no CRLF substitution. + sb = new StringBuilder(); + } + if (lastNonCRLFIndex < index) { + + // Copy an intervening non-CRLF characters up to but not including current 'index'. + sb.append(line.substring(lastNonCRLFIndex, index)); + } + lastNonCRLFIndex = ++index; + if (ch == CARRIAGE_RETURN) { + sb.append("\\r"); + } else { + sb.append("\\n"); + } + } else { + index++; + } + } + if (sb == null) { + + // No CRLF substitution -- return original line. + return line; + } else { + if (lastNonCRLFIndex < index) { + + // Copy an intervening non-CRLF characters up to but not including current 'index'. + sb.append(line.substring(lastNonCRLFIndex, index)); + } + return sb.toString(); + } + } +} diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 05c2acd..858f22b 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2237,6 +2237,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal HIVE_CLI_PRINT_HEADER("hive.cli.print.header", false, "Whether to print the names of the columns in query output."), + HIVE_CLI_PRINT_ESCAPE_CRLF("hive.cli.print.escape.crlf", false, + "Whether to print carriage returns and line feeds in row output as escaped \\r and \\n"), + HIVE_CLI_TEZ_SESSION_ASYNC("hive.cli.tez.session.async", true, "Whether to start Tez\n" + "session in background when running CLI with Tez, allowing CLI to be available earlier."), diff --git a/data/files/escape_crlf.parquet b/data/files/escape_crlf.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3f2cd97e58d52ee5bf166a27befdffd07eea6a01 GIT binary patch literal 610 zcmaKq%SyvQ6o#iYlu*)@PMAOzVW6o(ERDvbt=+hDS;UoAA(u%T$fZuwij=;M&mpc| zxblHK>BSa=E+(9l^M60{&&=@V&cp~4tQxomg@{F!2uBDZB2?fj!$Oi#mQ`5g08}xa z?#ZQ;y~a_>>J!Epse4&5X{_oMZ2w3A?MqIe_g&5-cg!@-iWyHEC0d97I~Z4$!L4x5 z@&XG8SDT2`;<+2o7{U@Xb${aWyx>tfZo)QeimDj8r~=g#k+Rj|s$7#FW zDwfvjJ+l?@Rgu3ddN?45qsi}Jj}(<0PhI{pV@2DY^Q@iw6PCE-qq3mI+mz8h<;>4` zKn0+Y2nywtVNpcI?ph Z%q!3H9n0?oo!+H$d4@jW%?#mn{Q_-Snc)Bc literal 0 HcmV?d00001 diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java index 55c6c23..f51cbfa 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java @@ -45,6 +45,7 @@ import com.google.common.base.Function; import com.google.common.collect.Lists; + import org.apache.commons.lang.exception.ExceptionUtils; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -69,6 +70,7 @@ // Default location of HiveServer2 private static final String tableName = "TestBeelineTable1"; private static final String tableComment = "Test table comment"; + private static final String escapeCRLFTableName = "TestBeelineEscapeCRLFTable"; private static MiniHS2 miniHS2; private static final String userName = System.getProperty("user.name"); @@ -121,6 +123,7 @@ private static void createTable() throws ClassNotFoundException, SQLException { HiveConf conf = new HiveConf(); String dataFileDir = conf.get("test.data.files").replace('\\', '/') .replace("c:", ""); + Path dataFilePath = new Path(dataFileDir, "kv1.txt"); // drop table. ignore error. try { @@ -201,6 +204,26 @@ private void testScriptFile(String scriptText, List argList, String expe } /** + * Attempt to execute a simple script file with the -f and -i option to + * BeeLine to test for presence of an expected pattern in the output (stdout + * or stderr), fail if not found. Print PASSED or FAILED + * + * @param expectedRegex + * Text to look for in command output (stdout) + * @param regExFlags + * flags for Pattern.matcher + * @throws Exception + * on command execution error + */ + private void testScriptFile(String scriptText, List argList, String expectedRegex, + int regExFlags) throws Throwable { + testScriptFile(scriptText, argList, OutStream.OUT, + Collections.singletonList(new Tuple<>(expectedRegex, true)), + regExFlags + ); + } + + /** * Attempt to execute a simple script file with the -f and -i option * to BeeLine to test for presence of an expected pattern * in the output (stdout or stderr), fail if not found. @@ -219,11 +242,23 @@ private void testScriptFile(String scriptText, List argList, OutStream o } private void testScriptFile(String scriptText, List argList, OutStream streamType, + List> expectedMatches, int regExFlags) throws Throwable { + testScriptFile(scriptText, argList, streamType, expectedMatches, + Arrays.asList(Modes.values()), regExFlags); + } + + private void testScriptFile(String scriptText, List argList, OutStream streamType, List> expectedMatches) throws Throwable { testScriptFile(scriptText, argList, streamType, expectedMatches, Arrays.asList(Modes.values())); } + private void testScriptFile(String scriptText, List argList, + OutStream streamType, List> expectedMatches, List modes) + throws Throwable { + testScriptFile(scriptText, argList, streamType, expectedMatches, modes, Pattern.DOTALL); + } + /** * Attempt to execute a simple script file with the -f or -i option * to BeeLine (or both) to test for presence of an expected pattern @@ -237,7 +272,7 @@ private void testScriptFile(String scriptText, List argList, OutStream s * @throws Exception on command execution error */ private void testScriptFile(String scriptText, List argList, - OutStream streamType, List> expectedMatches, List modes) + OutStream streamType, List> expectedMatches, List modes, int regExFlags) throws Throwable { // Put the script content in a temp file File scriptFile = File.createTempFile(this.getClass().getSimpleName(), "temp"); @@ -247,32 +282,68 @@ private void testScriptFile(String scriptText, List argList, os.print(scriptText); os.close(); - List> patternsToBeMatched = Lists.transform(expectedMatches, - new Function, Tuple>() { - @Override - public Tuple apply(Tuple tuple) { - return new Tuple<>( - Pattern.compile(".*" + tuple.pattern + ".*", Pattern.DOTALL), - tuple.shouldMatch - ); + if (regExFlags == 0) { + + // No patterns -- just match on equality. + for (Modes mode : modes) { + String output = mode.output(scriptFile, argList, streamType); + for (Tuple expectedMatch : expectedMatches) { + boolean matches = output.equals(expectedMatch.pattern);; + if (expectedMatch.shouldMatch != matches) { + //failed + byte[] bytes = output.getBytes(); + fail("Output (length " + output.length() + ")\n\"" + output + "\" " + + "bytes \"" + displayBytes(bytes, 0, bytes.length) + "\" " + + "should" + (expectedMatch.shouldMatch ? "" : " not") + + " equals \n" + expectedMatch.pattern); + } + } + } + } else { + + List> patternsToBeMatched = Lists.transform(expectedMatches, + new Function, Tuple>() { + @Override + public Tuple apply(Tuple tuple) { + return new Tuple<>( + Pattern.compile(".*" + tuple.pattern + ".*", regExFlags), + tuple.shouldMatch + ); + } + }); + + for (Modes mode : modes) { + String output = mode.output(scriptFile, argList, streamType); + for (Tuple patternToMatch : patternsToBeMatched) { + Matcher m = patternToMatch.pattern.matcher(output); + boolean matches = m.matches(); + if (patternToMatch.shouldMatch != matches) { + //failed + byte[] bytes = output.getBytes(); + fail("Output (length " + output.length() + ")\n\"" + output + "\" " + + "bytes \"" + displayBytes(bytes, 0, bytes.length) + "\" " + + "should" + (patternToMatch.shouldMatch ? "" : " not") + + " contain\n" + patternToMatch.pattern.pattern()); } - }); - - for (Modes mode : modes) { - String output = mode.output(scriptFile, argList, streamType); - for (Tuple patternToMatch : patternsToBeMatched) { - Matcher m = patternToMatch.pattern.matcher(output); - boolean matches = m.matches(); - if (patternToMatch.shouldMatch != matches) { - //failed - fail("Output" + output + " should" + (patternToMatch.shouldMatch ? "" : " not") + - " contain " + patternToMatch.pattern.pattern()); } } } scriptFile.delete(); } + public static String displayBytes(byte[] bytes, int start, int length) { + StringBuilder sb = new StringBuilder(); + for (int i = start; i < start + length; i++) { + char ch = (char) bytes[i]; + if (ch < ' ' || ch > '~') { + sb.append(String.format("\\%03d", bytes[i] & 0xff)); + } else { + sb.append(ch); + } + } + return sb.toString(); + } + /* We are testing for both type of modes always so not passing that as a parameter for now */ @@ -672,6 +743,94 @@ private String getFormatTestQueryForEableQuotes() { } /** + * Test writing output using Escape CRLF (false), DSV format, with custom delimiter ";" + */ + @Test + public void testEscapeCRLFOffInDSVOutput() throws Throwable { + String SCRIPT_TEXT = getFormatEscapeCRLFTestQuery(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=dsv"); + argList.add("--delimiterForDSV=;"); + argList.add("--showHeader=false"); + // Don't specify "--escapeCRLF" + + // Use MULTILINE regex's beginning of line ^ anchor. + // NOTE: trailing blanks get removed from the output... + final String EXPECTED_PATTERN = + "^no cr lf;a cr\nand a lf\n" + + "^word word end CRLF\n" + + "^\n"; + + testScriptFile(SCRIPT_TEXT, argList, EXPECTED_PATTERN, Pattern.MULTILINE); + } + + /** + * Test writing output using Escape CRLF, DSV format, with custom delimiter ";" + */ + @Test + public void testEscapeCRLFInDSVOutput() throws Throwable { + String SCRIPT_TEXT = getFormatEscapeCRLFTestQuery(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=dsv"); + argList.add("--delimiterForDSV=;"); + argList.add("--showHeader=false"); + argList.add("--escapeCRLF"); + + // Just compare for equals and avoid Pattern regex issues. + final String EQUALS_STRING = + "no cr lf;a cr\\nand a lf\\nword word end CRLF\\n\n"; + testScriptFile(SCRIPT_TEXT, argList, EQUALS_STRING, /* equals */ 0); + } + + /** + * Test writing output using Escape CRLF, TSV (new) format + */ + @Test + public void testEscapeCRLFInTSV2Output() throws Throwable { + String SCRIPT_TEXT = getFormatEscapeCRLFTestQuery(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=tsv2"); + argList.add("--showHeader=false"); + argList.add("--escapeCRLF"); + + // Just compare for equals and avoid Pattern regex issues. + final String EQUALS_STRING = + "no cr lf\ta cr\\nand a lf\\nword word end CRLF\\n\n"; + testScriptFile(SCRIPT_TEXT, argList, EQUALS_STRING, /* equals */ 0); + } + + /** + * Test writing output using Escape CRLF, CSV deprecated format + */ + @Test + public void testEscapeCRLFInCSVOutput() throws Throwable { + String SCRIPT_TEXT = getFormatEscapeCRLFTestQuery(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=csv"); + argList.add("--showHeader=false"); + argList.add("--escapeCRLF"); + + // Just compare for equals and avoid Pattern regex issues. + final String EQUALS_STRING = + "'no cr lf','a cr\\nand a lf\\nword word end CRLF\\n'\n"; + testScriptFile(SCRIPT_TEXT, argList, EQUALS_STRING, /* equals */ 0); + } + + private String getFormatEscapeCRLFTestQuery() { + + // Drop/create table for escape CRLF testing, populate, and query. + String queryString = + "set hive.support.concurrency = false;\n" + + "drop table if exists " + escapeCRLFTableName + ";\n" + + "create table " + escapeCRLFTableName + + " (no_crlf string, has_crlf string) stored as sequencefile;\n" + + "insert into table " + escapeCRLFTableName + + " values(\"no cr lf\", \"a cr \r and a lf \n word word end CRLF \r\n\");\n" + + "select * from " + escapeCRLFTableName + " limit 1 ;\n"; + return queryString; + } + + /** * Select null from table , check if setting null to empty string works - Using beeling cmd line * argument. * Original beeline/sqlline used to print nulls as empty strings diff --git a/itests/util/src/main/java/org/apache/hive/beeline/QFileBeeLineClient.java b/itests/util/src/main/java/org/apache/hive/beeline/QFileBeeLineClient.java index 72ec2d6..e6bf54b 100644 --- a/itests/util/src/main/java/org/apache/hive/beeline/QFileBeeLineClient.java +++ b/itests/util/src/main/java/org/apache/hive/beeline/QFileBeeLineClient.java @@ -44,6 +44,7 @@ "!set verbose false", "!set silent true", "!set showheader false", + "!set escapeCRLF false", "USE default;", "SHOW TABLES;", }; @@ -56,6 +57,7 @@ "!set verbose true", "!set silent false", "!set showheader true", + "!set escapeCRLF false", "!set outputformat table", "USE default;" }; diff --git a/ql/src/test/queries/clientpositive/cli_print_escape_crlf.q b/ql/src/test/queries/clientpositive/cli_print_escape_crlf.q new file mode 100644 index 0000000..a254b6b --- /dev/null +++ b/ql/src/test/queries/clientpositive/cli_print_escape_crlf.q @@ -0,0 +1,16 @@ +create table repro (lvalue int, charstring string) stored as parquet; + +LOAD DATA LOCAL INPATH '../../data/files/escape_crlf.parquet' overwrite into table repro; + +set hive.fetch.task.conversion=more; + + +select count(*) from repro; + +set hive.cli.print.escape.crlf=false; +select * from repro; + + +set hive.cli.print.escape.crlf=true; +select * from repro; + diff --git a/ql/src/test/results/clientpositive/cli_print_escape_crlf.q.out b/ql/src/test/results/clientpositive/cli_print_escape_crlf.q.out new file mode 100644 index 0000000..10e2b0a --- /dev/null +++ b/ql/src/test/results/clientpositive/cli_print_escape_crlf.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: create table repro (lvalue int, charstring string) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repro +POSTHOOK: query: create table repro (lvalue int, charstring string) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repro +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/escape_crlf.parquet' overwrite into table repro +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@repro +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/escape_crlf.parquet' overwrite into table repro +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@repro +PREHOOK: query: select count(*) from repro +PREHOOK: type: QUERY +PREHOOK: Input: default@repro +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from repro +POSTHOOK: type: QUERY +POSTHOOK: Input: default@repro +#### A masked pattern was here #### +3 +PREHOOK: query: select * from repro +PREHOOK: type: QUERY +PREHOOK: Input: default@repro +#### A masked pattern was here #### +POSTHOOK: query: select * from repro +POSTHOOK: type: QUERY +POSTHOOK: Input: default@repro +#### A masked pattern was here #### +1 newline +here +2 carriage return +here +3 both +here +PREHOOK: query: select * from repro +PREHOOK: type: QUERY +PREHOOK: Input: default@repro +#### A masked pattern was here #### +POSTHOOK: query: select * from repro +POSTHOOK: type: QUERY +POSTHOOK: Input: default@repro +#### A masked pattern was here #### +1 newline\nhere +2 carriage return\rhere +3 both\r\nhere