diff --git beeline/src/java/org/apache/hive/beeline/BeeLine.java beeline/src/java/org/apache/hive/beeline/BeeLine.java index 57cb1c0..ae542bf 100644 --- beeline/src/java/org/apache/hive/beeline/BeeLine.java +++ beeline/src/java/org/apache/hive/beeline/BeeLine.java @@ -80,11 +80,14 @@ import jline.History; import jline.SimpleCompletor; +import jline.Terminal; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.OptionBuilder; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; +import org.apache.commons.io.input.BOMInputStream; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.io.IOUtils; @@ -733,7 +736,7 @@ public int begin(String[] args, InputStream inputStream) throws IOException { } catch (Exception e) { // ignore } - ConsoleReader reader = getConsoleReader(inputStream); + ConsoleReader reader = getConsoleReader(inputStream, null); return execute(reader, false); } finally { close(); @@ -754,10 +757,10 @@ int runInit() { } private int executeFile(String fileName) { - FileInputStream initStream = null; + BOMInputStream initStream = null; try { - initStream = new FileInputStream(fileName); - return execute(getConsoleReader(initStream), true); + initStream = FileUtils.getBOMInputStream(new FileInputStream(fileName)); + return execute(getConsoleReader(initStream, initStream.getBOMCharsetName()), true); } catch (Throwable t) { handleException(t); return ERRNO_OTHER; @@ -788,10 +791,11 @@ public void close() { commands.closeall(null); } - public ConsoleReader getConsoleReader(InputStream inputStream) throws IOException { + public ConsoleReader getConsoleReader(InputStream inputStream, String charset) throws IOException { if (inputStream != null) { // ### NOTE: fix for sf.net bug 879425. - consoleReader = new ConsoleReader(inputStream, new PrintWriter(getOutputStream(), true)); + PrintWriter writer = new PrintWriter(getOutputStream(), true); + consoleReader = new ConsoleReader(inputStream, writer, null, getTerminal(charset)); } else { consoleReader = new ConsoleReader(); } @@ -827,7 +831,7 @@ public ConsoleReader getConsoleReader(InputStream inputStream) throws IOExceptio handleException(e); } - if (inputStream instanceof FileInputStream) { + if (inputStream instanceof FileInputStream || inputStream instanceof BOMInputStream) { // from script.. no need to load history and no need of completor, either return consoleReader; } @@ -843,6 +847,19 @@ public ConsoleReader getConsoleReader(InputStream inputStream) throws IOExceptio return consoleReader; } + // override system encoding + private Terminal getTerminal(String charset) { + String system = System.getProperty("input.encoding"); + if (charset != null && !charset.equals(system)) { + System.setProperty("input.encoding", charset); + } + Terminal terminal = Terminal.getTerminal(); + if (charset != null && !charset.equals(system)) { + System.setProperty("input.encoding", system); + } + return terminal; + } + void usage() { output(loc("cmd-usage")); diff --git cli/pom.xml cli/pom.xml index 345f124..e871cbc 100644 --- cli/pom.xml +++ cli/pom.xml @@ -84,7 +84,6 @@ commons-io commons-io ${commons-io.version} - test jline diff --git cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index 17ef2db..9ac1b14 100644 --- cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -41,12 +41,14 @@ import jline.History; import jline.SimpleCompletor; +import org.apache.commons.io.input.BOMInputStream; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.common.HiveInterruptUtils; import org.apache.hadoop.hive.common.LogUtils; import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; @@ -450,13 +452,14 @@ public int processFile(String fileName) throws IOException { } else { fs = FileSystem.get(path.toUri(), conf); } - BufferedReader bufferReader = null; int rc = 0; + BOMInputStream input = FileUtils.getBOMInputStream(fs.open(path)); try { - bufferReader = new BufferedReader(new InputStreamReader(fs.open(path))); + BufferedReader bufferReader = new BufferedReader( + new InputStreamReader(input, input.getBOMCharsetName())); rc = processReader(bufferReader); } finally { - IOUtils.closeStream(bufferReader); + IOUtils.closeStream(input); } return rc; } diff --git common/pom.xml common/pom.xml index ad9f6c0..5ab70c2 100644 --- common/pom.xml +++ common/pom.xml @@ -46,6 +46,11 @@ ${commons-cli.version} + commons-io + commons-io + ${commons-io.version} + + commons-lang commons-lang ${commons-lang.version} diff --git common/src/java/org/apache/hadoop/hive/common/FileUtils.java common/src/java/org/apache/hadoop/hive/common/FileUtils.java index f71bc3c..79b9902 100644 --- common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -20,26 +20,26 @@ import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; import java.security.AccessControlException; import java.security.PrivilegedExceptionAction; -import java.util.ArrayList; import java.util.BitSet; import java.util.List; +import org.apache.commons.io.ByteOrderMark; +import org.apache.commons.io.input.BOMInputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.HadoopShims.HdfsFileStatus; @@ -628,4 +628,22 @@ public static boolean equalsFileSystem(FileSystem fs1, FileSystem fs2) { //Once equality has been added in HDFS-4321, we should make use of it return fs1.getUri().equals(fs2.getUri()); } + + private static ByteOrderMark[] TARGET_BOMS; + + public static BOMInputStream getBOMInputStream(InputStream input) throws IOException { + if (TARGET_BOMS == null) { + String bisVersion = BOMInputStream.class.getPackage().getSpecificationVersion(); + if ("2.2".compareTo(bisVersion) > 0) { + // if commons-io library version is lower than 2.2, there is no UTF_32LE and UTF_32BE. + TARGET_BOMS = new ByteOrderMark[] { + ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE}; + } else { + TARGET_BOMS = new ByteOrderMark[] { + ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE, + ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE}; + } + } + return new BOMInputStream(input, TARGET_BOMS); + } }