diff --git beeline/src/java/org/apache/hive/beeline/BeeLine.java beeline/src/java/org/apache/hive/beeline/BeeLine.java index e0fa032..72fd899 100644 --- beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -161,9 +161,10 @@ private final Map formats = map(new Object[] { "vertical", new VerticalOutputFormat(this), "table", new TableOutputFormat(this), - "csv2", new SeparatedValuesOutputFormat(this, ','), - "tsv2", new SeparatedValuesOutputFormat(this, '\t'), - "dsv", new SeparatedValuesOutputFormat(this, BeeLineOpts.DEFAULT_DELIMITER_FOR_DSV), + "csv2", new SingleCharSeparatedValuesOutputFormat(this, ','), + "tsv2", new SingleCharSeparatedValuesOutputFormat(this, '\t'), + "dsv", new SingleCharSeparatedValuesOutputFormat(this, BeeLineOpts.DEFAULT_DELIMITER_FOR_DSV), + "dsv2", new MultiCharSeparatedValuesOutputFormat(this, BeeLineOpts.DEFAULT_DELIMITER_FOR_DSV), "csv", new DeprecatedSeparatedValuesOutputFormat(this, ','), "tsv", new DeprecatedSeparatedValuesOutputFormat(this, '\t'), "xmlattr", new XMLAttributeOutputFormat(this), @@ -863,7 +864,7 @@ public void updateOptsForCli() { getOpts().updateBeeLineOptsFromConf(); getOpts().setShowHeader(false); getOpts().setOutputFormat("dsv"); - getOpts().setDelimiterForDSV(' '); + getOpts().setDelimiterForDSV(" "); getOpts().setNullEmptyString(true); } diff --git beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java index e6e24b1..c9275f8 100644 --- beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java +++ beeline/src/java/org/apache/hive/beeline/BeeLineOpts.java @@ -103,7 +103,7 @@ private String scriptFile = null; private String[] initFiles = null; private String authType = null; - private char delimiterForDSV = DEFAULT_DELIMITER_FOR_DSV; + private String delimiterForDSV = String.valueOf(DEFAULT_DELIMITER_FOR_DSV); private Map hiveVariables = new HashMap(); private Map hiveConfVariables = new HashMap(); @@ -616,11 +616,11 @@ public void setTruncateTable(boolean truncateTable) { this.truncateTable = truncateTable; } - public char getDelimiterForDSV() { + public String getDelimiterForDSV() { return delimiterForDSV; } - public void setDelimiterForDSV(char delimiterForDSV) { + public void setDelimiterForDSV(String delimiterForDSV) { this.delimiterForDSV = delimiterForDSV; } diff --git beeline/src/java/org/apache/hive/beeline/MultiCharSeparatedValuesOutputFormat.java beeline/src/java/org/apache/hive/beeline/MultiCharSeparatedValuesOutputFormat.java new file mode 100644 index 0000000..63291a8 --- /dev/null +++ beeline/src/java/org/apache/hive/beeline/MultiCharSeparatedValuesOutputFormat.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.beeline; + +/** + * OutputFormat for values separated by a delimiter. The delimiter can consist of a single or + * multiple characters. + */ +class MultiCharSeparatedValuesOutputFormat extends SeparatedValuesOutputFormat { + private static final String OUTPUT_FORMAT_DSV2 = "dsv2"; + private String separator; + + MultiCharSeparatedValuesOutputFormat(BeeLine beeLine, String separator) { + super(beeLine); + this.separator = separator; + } + + MultiCharSeparatedValuesOutputFormat(BeeLine beeLine, char separator) { + super(beeLine); + this.separator = String.valueOf(separator); + } + + @Override + protected void updateSeparator() { + if (getBeeLine().getOpts().getOutputFormat().equals(OUTPUT_FORMAT_DSV2)) { + separator = getBeeLine().getOpts().getDelimiterForDSV(); + } + } + + /** + * {@inheritDoc} + */ + @Override + protected String getFormattedStr(String[] values) { + StringBuilder formattedValues = new StringBuilder(); + for (String value : values) { + formattedValues.append(formattedValues.length() == 0 ? "" : separator); + if (value == null) { + formattedValues.append(""); + continue; + } + + formattedValues.append(getEscapedValue(value)); + } + return formattedValues.toString(); + } + + /** + * Escapes the delimiter and quote characters in the values. + */ + private String getEscapedValue(String value) { + + if (isQuotingDisabled()) { + // If the quoting is disabled, no escaping happens. + return value; + } + + StringBuilder escapedValue = new StringBuilder(); + boolean containsQuote = false; + for (char character : value.toCharArray()) { + if (character == QUOTE_CHARACTER) { + // It the value contains the quote character, + // an additional quote character will be added in front of it. + escapedValue.append(QUOTE_CHARACTER); + containsQuote = true; + } + escapedValue.append(character); + } + + // The value has to be surrounded by quotes if it contains + // the separator or the quote character. + if (containsQuote || value.contains(separator)) { + escapedValue.insert(0, QUOTE_CHARACTER).append(QUOTE_CHARACTER); + } + + return escapedValue.toString(); + } +} \ No newline at end of file diff --git beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java index 66d9fd0..29a60d9 100644 --- beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java +++ beeline/src/java/org/apache/hive/beeline/SeparatedValuesOutputFormat.java @@ -22,49 +22,43 @@ */ package org.apache.hive.beeline; -import java.io.IOException; -import java.io.StringWriter; - -import org.apache.hadoop.io.IOUtils; -import org.supercsv.io.CsvListWriter; -import org.supercsv.prefs.CsvPreference; - /** - * OutputFormat for values separated by a delimiter. + * Common class for the OutputFormats for delimiter separated values. */ -class SeparatedValuesOutputFormat implements OutputFormat { +abstract class SeparatedValuesOutputFormat implements OutputFormat { public final static String DISABLE_QUOTING_FOR_SV = "disable.quoting.for.sv"; + protected final static char QUOTE_CHARACTER = '"'; private final BeeLine beeLine; - private CsvPreference quotedCsvPreference; - private CsvPreference unquotedCsvPreference; - SeparatedValuesOutputFormat(BeeLine beeLine, char separator) { + SeparatedValuesOutputFormat(BeeLine beeLine) { this.beeLine = beeLine; - unquotedCsvPreference = new CsvPreference.Builder('\0', separator, "").build(); - quotedCsvPreference = new CsvPreference.Builder('"', separator, "").build(); } - private void updateCsvPreference() { - if (beeLine.getOpts().getOutputFormat().equals("dsv")) { - // check whether delimiter changed by user - char curDel = (char) getCsvPreference().getDelimiterChar(); - char newDel = beeLine.getOpts().getDelimiterForDSV(); - // if delimiter changed, rebuild the csv preference - if (newDel != curDel) { - // "" is passed as the end of line symbol in following function, as - // beeline itself adds newline - if (isQuotingDisabled()) { - unquotedCsvPreference = new CsvPreference.Builder('\0', newDel, "").build(); - } else { - quotedCsvPreference = new CsvPreference.Builder('"', newDel, "").build(); - } - } - } - } + /** + * Gets the separator defined by the 'delimiterForDSV' option + * and updates the current separator if they differ. + */ + protected abstract void updateSeparator(); + + /** + * Returns the values separated by the delimiter. + * In case of single character delimiter, the Super CSV library is used to create the result. + * In case of multi-character delimiter, the Super CSV library cannot be used, + * since the library doesn't support strings as separator. + * + * If the quoting is not disabled the delimiter string and quote character will be escaped in the + * values. For example if the delimiter is "&" and the values are: aabb, cc&dd, ee"ff + * the result will be: aabb&"cc&dd"&"ee""ff" + * + * The quoting can be disabled by setting the 'disable.quoting.for.sv' option to true. + * In this case, no escaping happens. + * The result of the previous example would be: aabb&cc&dd&ee"ff + */ + protected abstract String getFormattedStr(String[] vals); @Override public int print(Rows rows) { - updateCsvPreference(); + updateSeparator(); int count = 0; while (rows.hasNext()) { @@ -79,28 +73,13 @@ public int print(Rows rows) { return count - 1; // sans header row } - private String getFormattedStr(String[] vals) { - StringWriter strWriter = new StringWriter(); - CsvListWriter writer = new CsvListWriter(strWriter, getCsvPreference()); - if (vals.length > 0) { - try { - writer.write(vals); - } catch (IOException e) { - beeLine.error(e); - } finally { - IOUtils.closeStream(writer); - } - } - return strWriter.toString(); - } - private void printRow(Rows.Row row) { String[] vals = row.values; String formattedStr = getFormattedStr(vals); beeLine.output(formattedStr); } - private boolean isQuotingDisabled() { + protected boolean isQuotingDisabled() { String quotingDisabledStr = System.getProperty(SeparatedValuesOutputFormat.DISABLE_QUOTING_FOR_SV); if (quotingDisabledStr == null || quotingDisabledStr.isEmpty()) { // default is disabling the double quoting for separated value @@ -116,11 +95,7 @@ private boolean isQuotingDisabled() { } } - private CsvPreference getCsvPreference() { - if (isQuotingDisabled()) { - return unquotedCsvPreference; - } else { - return quotedCsvPreference; - } + protected BeeLine getBeeLine() { + return beeLine; } } diff --git beeline/src/java/org/apache/hive/beeline/SingleCharSeparatedValuesOutputFormat.java beeline/src/java/org/apache/hive/beeline/SingleCharSeparatedValuesOutputFormat.java new file mode 100644 index 0000000..0c9de8f --- /dev/null +++ beeline/src/java/org/apache/hive/beeline/SingleCharSeparatedValuesOutputFormat.java @@ -0,0 +1,92 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This source file is based on code taken from SQLLine 1.0.2 + * See SQLLine notice in LICENSE + */ +package org.apache.hive.beeline; + +import java.io.IOException; +import java.io.StringWriter; + +import org.apache.hadoop.io.IOUtils; +import org.supercsv.io.CsvListWriter; +import org.supercsv.prefs.CsvPreference; + +/** + * OutputFormat for values separated by a single character delimiter. + */ +class SingleCharSeparatedValuesOutputFormat extends SeparatedValuesOutputFormat { + private static final String OUTPUT_FORMAT_DSV = "dsv"; + private static final char NULL_CHARACTER = '\0'; + private CsvPreference quotedCsvPreference; + private CsvPreference unquotedCsvPreference; + + SingleCharSeparatedValuesOutputFormat(BeeLine beeLine, char separator) { + super(beeLine); + unquotedCsvPreference = new CsvPreference.Builder(NULL_CHARACTER, separator, "").build(); + quotedCsvPreference = new CsvPreference.Builder(QUOTE_CHARACTER, separator, "").build(); + } + + @Override + protected void updateSeparator() { + if (getBeeLine().getOpts().getOutputFormat().equals(OUTPUT_FORMAT_DSV)) { + // check whether delimiter changed by user + char curDel = (char) getCsvPreference().getDelimiterChar(); + char newDel = getBeeLine().getOpts().getDelimiterForDSV().charAt(0); + // if delimiter changed, rebuild the csv preference + if (newDel != curDel) { + // "" is passed as the end of line symbol in following function, as + // beeline itself adds newline + if (isQuotingDisabled()) { + unquotedCsvPreference = new CsvPreference.Builder(NULL_CHARACTER, newDel, "").build(); + } else { + quotedCsvPreference = new CsvPreference.Builder(QUOTE_CHARACTER, newDel, "").build(); + } + } + } + } + + /** + * {@inheritDoc} + */ + @Override + protected String getFormattedStr(String[] vals) { + StringWriter strWriter = new StringWriter(); + CsvListWriter writer = new CsvListWriter(strWriter, getCsvPreference()); + if (vals.length > 0) { + try { + writer.write(vals); + } catch (IOException e) { + getBeeLine().error(e); + } finally { + IOUtils.closeStream(writer); + } + } + return strWriter.toString(); + } + + private CsvPreference getCsvPreference() { + if (isQuotingDisabled()) { + return unquotedCsvPreference; + } else { + return quotedCsvPreference; + } + } +} \ No newline at end of file diff --git itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java index 892c733..1cb4018 100644 --- itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java +++ itests/hive-unit/src/test/java/org/apache/hive/beeline/TestBeeLineWithArgs.java @@ -424,6 +424,20 @@ public void testDSVOutput() throws Throwable { } /** + * Test writing output using DSV format, with multiple character delimiter "&&" + */ + @Test + public void testDSVOutputStringDelimiter() throws Throwable { + String SCRIPT_TEXT = getFormatTestQueryForStringDelimiter(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=dsv2"); + argList.add("--delimiterForDSV=&&"); + + final String EXPECTED_PATTERN = "1&&NULL&&defg&&ab\"c&&1.0&&aa&b&&aa&&b"; + testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList); + } + + /** * Test writing output using TSV (new) format */ @Test @@ -526,6 +540,22 @@ public void testDSVOutputWithDoubleQuotes() throws Throwable { } /** + * Test writing output using DSV format, with multiple character delimiter "&&" + */ + @Test + public void testDSVOutputWithStringDelimiterWithDoubleQuotes() throws Throwable { + String SCRIPT_TEXT = getFormatTestQueryForStringDelimiterAndEnableQuotes(); + List argList = getBaseArgs(miniHS2.getBaseJdbcURL()); + argList.add("--outputformat=dsv2"); + argList.add("--delimiterForDSV=&&"); + System.setProperty(SeparatedValuesOutputFormat.DISABLE_QUOTING_FOR_SV, "false"); + + final String EXPECTED_PATTERN = "1&&NULL&&defg&&\"ab\"\"c\"&&\"\"\"aa\"\"\"&&1.0&&aa&b&&\"aa&&b\""; + testScriptFile(SCRIPT_TEXT, EXPECTED_PATTERN, true, argList); + System.setProperty(SeparatedValuesOutputFormat.DISABLE_QUOTING_FOR_SV, "true"); + } + + /** * Test writing output using TSV deprecated format * Check for deprecation message */ @@ -576,6 +606,16 @@ private String getFormatTestQueryForEableQuotes() { "select 1, null, 'defg', 'ab\"c', '\"aa\"', 1.0D from " + tableName + " limit 1 ;\n"; } + private String getFormatTestQueryForStringDelimiter() { + return "set hive.support.concurrency = false;\n" + + "select 1, null, 'defg', 'ab\"c', 1.0D, 'aa&b', 'aa&&b' from " + tableName + " limit 1 ;\n"; + } + + private String getFormatTestQueryForStringDelimiterAndEnableQuotes() { + return "set hive.support.concurrency = false;\n" + + "select 1, null, 'defg', 'ab\"c', '\"aa\"', 1.0D, 'aa&b', 'aa&&b' from " + tableName + " limit 1 ;\n"; + } + /** * Select null from table , check if setting null to empty string works - Using beeling cmd line * argument.