diff --git hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml index 8984503..a1522ba 100644 --- hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml +++ hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml @@ -35,7 +35,7 @@ templeton.libjars - ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar + ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar Jars to add to the classpath. @@ -69,6 +69,19 @@ shipped to the target node in the cluster to execute Pig job which uses HCat, Hive query, etc. + + + templeton.hive.extra.files + ${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib + The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH + before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized + and ./foo/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories, + the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to + HADOOP_CLASSPATH. + This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus + each path is interpreted by Generic Option Parser. It can be local or hdfs path. + + templeton.hcat.home apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog @@ -101,7 +114,7 @@ - + templeton.hive.properties hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false diff --git hcatalog/src/test/e2e/templeton/deployers/env.sh hcatalog/src/test/e2e/templeton/deployers/env.sh index e2dd952..958ced8 100755 --- hcatalog/src/test/e2e/templeton/deployers/env.sh +++ hcatalog/src/test/e2e/templeton/deployers/env.sh @@ -36,6 +36,10 @@ if [ -z ${PIG_VERSION} ]; then export PIG_VERSION=0.12.2-SNAPSHOT fi +if [ -z ${TEZ_VERSION} ]; then + export TEZ_VERSION=0.5.3 +fi + #Root of project source tree if [ -z ${PROJ_HOME} ]; then export PROJ_HOME=/Users/${USER}/dev/hive @@ -46,6 +50,7 @@ if [ -z ${HADOOP_HOME} ]; then export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION} fi +export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION} #Make sure Pig is built for the Hadoop version you are running export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build #this is part of Pig distribution diff --git hcatalog/webhcat/svr/src/main/config/webhcat-default.xml hcatalog/webhcat/svr/src/main/config/webhcat-default.xml index 5344a0f..d9d0270 100644 --- hcatalog/webhcat/svr/src/main/config/webhcat-default.xml +++ hcatalog/webhcat/svr/src/main/config/webhcat-default.xml @@ -39,7 +39,7 @@ templeton.libjars - ${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar + ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar Jars to add to the classpath. @@ -106,7 +106,20 @@ templeton.hive.path hive-0.11.0.tar.gz/hive-0.11.0/bin/hive - The path to the Hive executable. + The path to the Hive executable. Applies only if templeton.hive.archive is defined. + + + + templeton.hive.extra.files + /tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib + The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH + before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized + and ./foo/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories, + the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to + HADOOP_CLASSPATH. + This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus + each path is interpreted by Generic Option Parser. It can be local or hdfs path. + diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java index 37ce997..1b3ce68 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java @@ -149,6 +149,11 @@ public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts"; public static final String HADOOP_MAP_MEMORY_MB = "mapreduce.map.memory.mb"; public static final String UNIT_TEST_MODE = "templeton.unit.test.mode"; + /** + * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in + * LaunchMapper before launching Hive command + */ + public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files"; private static final Log LOG = LogFactory.getLog(AppConfig.class); diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java index 12ad517..4e52d7b 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java @@ -27,6 +27,7 @@ import java.util.Map; import org.apache.commons.exec.ExecuteException; +import org.apache.hadoop.fs.Path; import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants; import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob; import org.apache.hive.hcatalog.templeton.tool.TempletonUtils; @@ -117,7 +118,7 @@ public EnqueueBean run(String user, Map userArgs, private List makeBasicArgs(String execute, String srcFile, String otherFiles, String statusdir, String completedUrl, boolean enablelog) - throws URISyntaxException, FileNotFoundException, IOException, + throws URISyntaxException, IOException, InterruptedException { ArrayList args = new ArrayList(); @@ -142,6 +143,30 @@ public EnqueueBean run(String user, Map userArgs, args.add(appConf.hiveArchive()); } + //ship additional artifacts, for example for Tez + String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES); + if(extras != null && extras.length() > 0) { + boolean foundLibjars = false; + for(int i = 0; i < args.size(); i++) { + if(FILES.equals(args.get(i))) { + String value = args.get(i + 1); + args.set(i + 1, value + "," + extras); + foundLibjars = true; + } + } + if(!foundLibjars) { + args.add(FILES); + args.add(extras); + } + String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES); + StringBuilder extraFileNames = new StringBuilder(); + //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH + for(String file : extraFiles) { + Path p = new Path(file); + extraFileNames.append(p.getName()).append(","); + } + addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, extraFileNames.toString()); + } return args; } } diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java index cd20c26..71c328d 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java @@ -28,6 +28,10 @@ * http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options */ public static final String ARCHIVES = "-archives"; + /** + * http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options + */ + public static final String FILES = "-files"; protected AppConfig appConf; diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java index 1252105..1d560b6 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java @@ -31,6 +31,12 @@ public static final String EXIT_FNAME = "exit"; public static final int WATCHER_TIMEOUT_SECS = 10; public static final int KEEP_ALIVE_MSEC = 60 * 1000; + /** + * A comma-separated list of files to be added to HADOOP_CLASSPATH in + * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}. Used to localize additional + * artifacts for job submission requests. + */ + public static final String HADOOP_CLASSPATH_EXTRAS = "templeton.hadoop.classpath.extras"; /* * The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required because * org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() prepares diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java index c306841..2d1af4b 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java @@ -21,6 +21,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; @@ -33,7 +34,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.hcatalog.templeton.AppConfig; import org.apache.hive.hcatalog.templeton.BadParam; import org.apache.hive.hcatalog.templeton.LauncherDelegator; @@ -115,6 +115,32 @@ private static void handleSqoop(Configuration conf, Map env) thr } } } + private static void handleHadoopClasspathExtras(Configuration conf, Map env) + throws IOException { + if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS))) { + return; + } + LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + conf.get(HADOOP_CLASSPATH_EXTRAS)); + String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS); + StringBuilder paths = new StringBuilder(); + FileSystem fs = FileSystem.getLocal(conf);//these have been localized already + for(String f : files) { + Path p = new Path(f); + FileStatus fileStatus = fs.getFileStatus(p); + paths.append(f); + if(fileStatus.isDirectory()) { + paths.append(File.separator).append("*"); + } + paths.append(File.pathSeparator); + } + paths.setLength(paths.length() - 1); + if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) { + env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + File.pathSeparator + paths); + } + else { + env.put("HADOOP_CLASSPATH", paths.toString()); + } + } protected Process startJob(Context context, String user, String overrideClasspath) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); @@ -135,6 +161,7 @@ protected Process startJob(Context context, String user, String overrideClasspat Map env = TempletonUtils.hadoopUserEnv(user, overrideClasspath); handlePigEnvVars(conf, env); handleSqoop(conf, env); + handleHadoopClasspathExtras(conf, env); List jarArgsList = new LinkedList(Arrays.asList(jarArgs)); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, "mapreduce.job.credentials.binary"); handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path"); diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java index 892d79e..0062db0 100644 --- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java +++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java @@ -79,29 +79,29 @@ private static void logDebugInfo(String msg, Map props) { * Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep) * time honored tradition in WebHCat of borrowing from Oozie */ - private static void printContentsOfDir(String dir) { + private static StringBuilder printContentsOfDir(String dir, int depth, StringBuilder sb) { + StringBuilder indent = new StringBuilder(); + for(int i = 0; i < depth; i++) { + indent.append("--"); + } File folder = new File(dir); - StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); + sb.append(indent).append("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n'); File[] listOfFiles = folder.listFiles(); + if(listOfFiles == null) { + return sb; + } for (File fileName : listOfFiles) { if (fileName.isFile()) { - sb.append("File: ").append(fileName.getName()).append('\n'); + sb.append(indent).append("File: ").append(fileName.getName()).append('\n'); } else if (fileName.isDirectory()) { - sb.append("Dir: ").append(fileName.getName()).append('\n'); - File subDir = new File(fileName.getName()); - File[] moreFiles = subDir.listFiles(); - for (File subFileName : moreFiles) { - if (subFileName.isFile()) { - sb.append("--File: ").append(subFileName.getName()).append('\n'); - } - else if (subFileName.isDirectory()) { - sb.append("--Dir: ").append(subFileName.getName()).append('\n'); - } - } + printContentsOfDir(fileName.getName(), depth+1, sb); } } - LOG.info(sb.toString()); + return sb; + } + private static void printContentsOfDir(String dir) { + LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString()); } }