diff --git a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java index aeced48..659c84a 100644 --- a/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java +++ b/cli/src/java/org/apache/hadoop/hive/cli/CliDriver.java @@ -712,7 +712,7 @@ private int executeDriver(CliSessionState ss, HiveConf conf, OptionsProcessor o ClassLoader loader = conf.getClassLoader(); String auxJars = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEAUXJARS); if (StringUtils.isNotBlank(auxJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); + loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","), conf); } conf.setClassLoader(loader); Thread.currentThread().setContextClassLoader(loader); diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index c1f8842..7319471 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -192,7 +192,7 @@ public static String makeListBucketingDirName(List lbCols, List '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{', - '[', ']', '^'}; + '[', ']', '^', ' ','<','>','|'}; for (char c : clist) { charToEscape.set(c); diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b5a6401..b404603 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1128,6 +1128,9 @@ private void initialize(Class cls) { hiveJar = this.get(ConfVars.HIVEJAR.varname); } + if (Shell.WINDOWS && hiveJar != null && hiveJar.startsWith("/")) { + hiveJar = hiveJar.substring(1); + } if (auxJars == null) { auxJars = this.get(ConfVars.HIVEAUXJARS.varname); } diff --git a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java index f64b164..475ff4e 100644 --- a/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java +++ b/common/src/test/org/apache/hadoop/hive/conf/TestHiveConf.java @@ -35,7 +35,7 @@ public void testHiveSitePath() throws Exception { String expectedPath = new Path(System.getProperty("test.build.resources") + "/hive-site.xml").toUri().getPath(); - assertEquals(expectedPath, new HiveConf().getHiveSiteLocation().getPath()); + assertEquals(expectedPath.toLowerCase(), (new HiveConf().getHiveSiteLocation().getPath()).toLowerCase()); } private void checkHadoopConf(String name, String expectedHadoopVal) throws Exception { diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java index 09deecf..510e2f7 100644 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java @@ -256,10 +256,10 @@ public static void partitionTester(HiveMetaStoreClient client, HiveConf hiveConf } assertTrue("Partitions are not same", part.equals(part_get)); - String partName = "ds=2008-07-01 14%3A13%3A12/hr=14"; - String part2Name = "ds=2008-07-01 14%3A13%3A12/hr=15"; - String part3Name ="ds=2008-07-02 14%3A13%3A12/hr=15"; - String part4Name ="ds=2008-07-03 14%3A13%3A12/hr=151"; + String partName = "ds=2008-07-01%2014%3A13%3A12/hr=14"; + String part2Name = "ds=2008-07-01%2014%3A13%3A12/hr=15"; + String part3Name ="ds=2008-07-02%2014%3A13%3A12/hr=15"; + String part4Name ="ds=2008-07-03%2014%3A13%3A12/hr=151"; part_get = client.getPartition(dbName, tblName, partName); assertTrue("Partitions are not the same", part.equals(part_get)); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java index 38d97e3..3e05ea7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/CopyTask.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.net.URLDecoder; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -51,8 +52,8 @@ public int execute(DriverContext driverContext) { FileSystem dstFs = null; Path toPath = null; try { - Path fromPath = new Path(work.getFromPath()); - toPath = new Path(work.getToPath()); + Path fromPath = new Path(URLDecoder.decode(work.getFromPath())); + toPath = new Path(URLDecoder.decode(work.getToPath())); console.printInfo("Copying data from " + fromPath.toString(), " to " + toPath.toString()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java index cee95fd..3ba0b2b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.Serializable; +import java.net.URLDecoder; import java.security.AccessControlException; import java.util.ArrayList; import java.util.Arrays; @@ -203,8 +204,8 @@ public int execute(DriverContext driverContext) { // to appropriate locations LoadFileDesc lfd = work.getLoadFileWork(); if (lfd != null) { - Path targetPath = new Path(lfd.getTargetDir()); - Path sourcePath = new Path(lfd.getSourceDir()); + Path targetPath = new Path(URLDecoder.decode(lfd.getTargetDir())); + Path sourcePath = new Path(URLDecoder.decode(lfd.getSourceDir())); moveFile(sourcePath, targetPath, lfd.getIsDfsDir()); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index edb55fa..a5b2a3b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -142,6 +142,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPOr; +import org.apache.hadoop.hive.ql.util.DosToUnix; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.Serializer; @@ -205,6 +206,49 @@ private Utilities() { .synchronizedMap(new HashMap()); private static final Log LOG = LogFactory.getLog(Utilities.class.getName()); + /** + * Returns true if it is from any external File Systems except local + */ + public static boolean canDownloadResource(String value) { + // Allow to download resources from any external FileSystem. + // And no need to download if it already exists on local file system. + String scheme = new Path(value).toUri().getScheme(); + return (scheme != null) && !scheme.equalsIgnoreCase("file"); + } + + /** + * If the resource is not in local File System, download it to DOWNLOADED_RESOURCES_DIR + * and return the the path of the downloaded resource on the local File System + */ + public static String downloadResource(String value, Configuration conf, boolean convertToUnix) { + if (canDownloadResource(value)) { + File resourceDir = new File(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); + String destinationName = new Path(value).getName(); + File destinationFile = new File(resourceDir, destinationName); + if (resourceDir.exists() && !resourceDir.isDirectory()) { + throw new RuntimeException("The resource directory is not a directory, resourceDir is set to" + resourceDir); + } + if (!resourceDir.exists() && !resourceDir.mkdirs()) { + throw new RuntimeException("Couldn't create directory " + resourceDir); + } + try { + FileSystem fs = FileSystem.get(new URI(value), conf); + fs.copyToLocalFile(new Path(value), new Path(destinationFile.getCanonicalPath())); + value = destinationFile.getCanonicalPath(); + if (convertToUnix && DosToUnix.isWindowsScript(destinationFile)) { + try { + DosToUnix.convertWindowsScriptToUnix(destinationFile); + } catch (Exception e) { + throw new RuntimeException("Caught exception while converting to unix line endings", e); + } + } + } catch (Exception e) { + throw new RuntimeException("Failed to read external resource " + value, e); + } + } + return value; + } + public static void clearWork(Configuration conf) { Path mapPath = getPlanPath(conf, MAP_PLAN_NAME); Path reducePath = getPlanPath(conf, REDUCE_PLAN_NAME); @@ -1617,11 +1661,30 @@ public static String getResourceFiles(Configuration conf, SessionState.ResourceT /** * Add new elements to the classpath. + * If an element is not in the local file system (like ASV or S3), it is downloaded + * first, and then added to the classpath + * @param newPaths + * Array of classpath elements + */ + public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths, + Configuration conf) throws Exception { + //iterate through the jars and download to local if needed + ArrayList local = new ArrayList(); + for(String path : newPaths){ + // By default don't convert to unix + path = downloadResource(path, conf, false); + local.add(path); + } + return addLocalToClassPath(cloader, local.toArray(new String[0])); + } + + /** + * Add new elements to the classpath. * * @param newPaths * Array of classpath elements */ - public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { + public static ClassLoader addLocalToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { URLClassLoader loader = (URLClassLoader) cloader; List curPath = Arrays.asList(loader.getURLs()); ArrayList newPath = new ArrayList(); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java index eb9205b..f37ec24 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java @@ -698,10 +698,10 @@ public static void main(String[] args) throws IOException, HiveException { // see also - code in CliDriver.java ClassLoader loader = conf.getClassLoader(); if (StringUtils.isNotBlank(auxJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ",")); + loader = Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","),conf); } if (StringUtils.isNotBlank(addedJars)) { - loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ",")); + loader = Utilities.addToClassPath(loader, StringUtils.split(addedJars, ","),conf); } conf.setClassLoader(loader); // Also set this to the Thread ContextClassLoader, so new threads will diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 1d6e6fe..0cb5a0e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -1902,7 +1902,7 @@ private String fetchFilesNotInLocalFilesystem(String cmd) { SessionState ss = SessionState.get(); String progName = getScriptProgName(cmd); - if (SessionState.canDownloadResource(progName)) { + if (Utilities.canDownloadResource(progName)) { String filePath = ss.add_resource(ResourceType.FILE, progName, true); if (filePath == null) { throw new RuntimeException("Could not download the resource: " + progName); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 6888504..272c341 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -454,7 +454,8 @@ public static boolean registerJar(String newJar) { LogHelper console = getConsole(); try { ClassLoader loader = Thread.currentThread().getContextClassLoader(); - ClassLoader newLoader = Utilities.addToClassPath(loader, StringUtils.split(newJar, ",")); + ClassLoader newLoader = Utilities.addLocalToClassPath(loader, + StringUtils.split(newJar, ",")); Thread.currentThread().setContextClassLoader(newLoader); SessionState.get().getConf().setClassLoader(newLoader); console.printInfo("Added " + newJar + " to class path"); @@ -571,7 +572,7 @@ public String add_resource(ResourceType t, String value) { public String add_resource(ResourceType t, String value, boolean convertToUnix) { try { - value = downloadResource(value, convertToUnix); + value = Utilities.downloadResource(value, conf, convertToUnix); } catch (Exception e) { getConsole().printError(e.getMessage()); return null; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/WindowsPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/WindowsPathUtil.java new file mode 100644 index 0000000..27dc333 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/WindowsPathUtil.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.util.Shell; + +public class WindowsPathUtil { + + public static void convertPathsFromWindowsToHdfs(HiveConf conf){ + if(Shell.WINDOWS){ + String orgWarehouseDir = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE); + conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, getHdfsUriString(orgWarehouseDir)); + + String orgTestTempDir = System.getProperty("test.tmp.dir"); + System.setProperty("test.tmp.dir", getHdfsUriString(orgTestTempDir)); + + String orgTestDataDir = System.getProperty("test.src.data.dir"); + System.setProperty("test.src.data.dir", getHdfsUriString(orgTestDataDir)); + + String orgScratchDir = conf.getVar(HiveConf.ConfVars.SCRATCHDIR); + conf.setVar(HiveConf.ConfVars.SCRATCHDIR, getHdfsUriString(orgScratchDir)); + } + } + + private static String getHdfsUriString(String uriStr) { + assert uriStr != null; + if(Shell.WINDOWS) { + // If the URI conversion is from Windows to HDFS then replace the '\' with '/' + // and remove the windows single drive letter & colon from absolute path. + return uriStr.replace('\\', '/') + .replaceFirst("/[c-zC-Z]:", "/") + .replaceFirst("^[c-zC-Z]:", ""); + } + return uriStr; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java index 9588061..3cd39c4 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExecDriver.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.MetaStoreUtils; import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.ql.exec.mr.ExecDriver; import org.apache.hadoop.hive.ql.exec.mr.MapRedTask; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; @@ -79,6 +80,9 @@ try { conf = new HiveConf(ExecDriver.class); + //convert possible incompatible Windows path in config + WindowsPathUtil.convertPathsFromWindowsToHdfs(conf); + fs = FileSystem.get(conf); if (fs.exists(tmppath) && !fs.getFileStatus(tmppath).isDir()) { throw new RuntimeException(tmpdir + " exists but is not a directory"); @@ -161,7 +165,7 @@ private static void fileDiff(String datafile, String testdir) throws Exception { } FSDataInputStream fi_test = fs.open((fs.listStatus(di_test))[0].getPath()); - if (!Utilities.contentsEqual(fi_gold, fi_test, false)) { + if (!Utilities.contentsEqual(fi_gold, fi_test, true)) { LOG.error(di_test.toString() + " does not match " + datafile); assertEquals(false, true); } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index 6490b89..fb2284d 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.WindowsPathUtil; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.thrift.TException; @@ -60,6 +61,7 @@ protected void setUp() throws Exception { super.setUp(); hive = Hive.get(); + WindowsPathUtil.convertPathsFromWindowsToHdfs(hive.getConf()); checker = new HiveMetaStoreChecker(hive); partCols = new ArrayList();