diff --git hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
index 8984503..a1522ba 100644
--- hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
+++ hcatalog/src/test/e2e/templeton/deployers/config/webhcat/webhcat-site.xml
@@ -35,7 +35,7 @@
templeton.libjars
- ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.5.jar
+ ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar
Jars to add to the classpath.
@@ -69,6 +69,19 @@
shipped to the target node in the cluster to execute Pig job which uses
HCat, Hive query, etc.
+
+
+ templeton.hive.extra.files
+ ${env.TEZ_CLIENT_HOME}/conf/tez-site.xml,${env.TEZ_CLIENT_HOME}/,${env.TEZ_CLIENT_HOME}/lib
+ The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH
+ before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized
+ and ./foo/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories,
+ the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to
+ HADOOP_CLASSPATH.
+ This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus
+ each path is interpreted by Generic Option Parser. It can be local or hdfs path.
+
+
templeton.hcat.home
apache-hive-${env.HIVE_VERSION}-bin.tar.gz/apache-hive-${env.HIVE_VERSION}-bin/hcatalog
@@ -101,7 +114,7 @@
-
+
templeton.hive.properties
hive.metastore.uris=thrift://localhost:9933,hive.metastore.sasl.enabled=false
diff --git hcatalog/src/test/e2e/templeton/deployers/env.sh hcatalog/src/test/e2e/templeton/deployers/env.sh
index e2dd952..958ced8 100755
--- hcatalog/src/test/e2e/templeton/deployers/env.sh
+++ hcatalog/src/test/e2e/templeton/deployers/env.sh
@@ -36,6 +36,10 @@ if [ -z ${PIG_VERSION} ]; then
export PIG_VERSION=0.12.2-SNAPSHOT
fi
+if [ -z ${TEZ_VERSION} ]; then
+ export TEZ_VERSION=0.5.3
+fi
+
#Root of project source tree
if [ -z ${PROJ_HOME} ]; then
export PROJ_HOME=/Users/${USER}/dev/hive
@@ -46,6 +50,7 @@ if [ -z ${HADOOP_HOME} ]; then
export HADOOP_HOME=/Users/${USER}/dev/hwxhadoop/hadoop-dist/target/hadoop-${HADOOP_VERSION}
fi
+export TEZ_CLIENT_HOME=/Users/ekoifman/dev/apache-tez-client-${TEZ_VERSION}
#Make sure Pig is built for the Hadoop version you are running
export PIG_TAR_PATH=/Users/${USER}/dev/pig-${PIG_VERSION}-src/build
#this is part of Pig distribution
diff --git hcatalog/webhcat/svr/src/main/config/webhcat-default.xml hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
index 5344a0f..d9d0270 100644
--- hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
+++ hcatalog/webhcat/svr/src/main/config/webhcat-default.xml
@@ -39,7 +39,7 @@
templeton.libjars
- ${env.TEMPLETON_HOME}/share/webhcat/svr/lib/zookeeper-3.4.3.jar
+ ${env.TEMPLETON_HOME}/../lib/zookeeper-3.4.6.jar,${env.TEMPLETON_HOME}/../lib/hive-common-1.2.0-SNAPSHOT.jar
Jars to add to the classpath.
@@ -106,7 +106,20 @@
templeton.hive.path
hive-0.11.0.tar.gz/hive-0.11.0/bin/hive
- The path to the Hive executable.
+ The path to the Hive executable. Applies only if templeton.hive.archive is defined.
+
+
+
+ templeton.hive.extra.files
+ /tez-client/conf/tez-site.xml,/tez-client/,/tez-client/lib
+ The resources in this list will be localized to the node running LaunchMapper and added to HADOOP_CLASSPTH
+ before launching 'hive' command. If the path /foo/bar is a directory, the contents of the the entire dir will be localized
+ and ./foo/* will be added to HADOOP_CLASSPATH. Note that since classpath path processing does not recurse into subdirectories,
+ the paths in this property may be overlapping. In the example above, "./tez-site.xml:./tez-client/*:./lib/*" will be added to
+ HADOOP_CLASSPATH.
+ This can be used to specify config files, Tez artifacts, etc. This will be sent -files option of hadoop jar command thus
+ each path is interpreted by Generic Option Parser. It can be local or hdfs path.
+
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
index 37ce997..1b3ce68 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/AppConfig.java
@@ -149,6 +149,11 @@
public static final String HADOOP_CHILD_JAVA_OPTS = "mapred.child.java.opts";
public static final String HADOOP_MAP_MEMORY_MB = "mapreduce.map.memory.mb";
public static final String UNIT_TEST_MODE = "templeton.unit.test.mode";
+ /**
+ * comma-separated list of artifacts to add to HADOOP_CLASSPATH evn var in
+ * LaunchMapper before launching Hive command
+ */
+ public static final String HIVE_EXTRA_FILES = "templeton.hive.extra.files";
private static final Log LOG = LogFactory.getLog(AppConfig.class);
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
index 12ad517..4e52d7b 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/HiveDelegator.java
@@ -27,6 +27,7 @@
import java.util.Map;
import org.apache.commons.exec.ExecuteException;
+import org.apache.hadoop.fs.Path;
import org.apache.hive.hcatalog.templeton.tool.JobSubmissionConstants;
import org.apache.hive.hcatalog.templeton.tool.TempletonControllerJob;
import org.apache.hive.hcatalog.templeton.tool.TempletonUtils;
@@ -117,7 +118,7 @@ public EnqueueBean run(String user, Map userArgs,
private List makeBasicArgs(String execute, String srcFile, String otherFiles,
String statusdir, String completedUrl,
boolean enablelog)
- throws URISyntaxException, FileNotFoundException, IOException,
+ throws URISyntaxException, IOException,
InterruptedException
{
ArrayList args = new ArrayList();
@@ -142,6 +143,30 @@ public EnqueueBean run(String user, Map userArgs,
args.add(appConf.hiveArchive());
}
+ //ship additional artifacts, for example for Tez
+ String extras = appConf.get(AppConfig.HIVE_EXTRA_FILES);
+ if(extras != null && extras.length() > 0) {
+ boolean foundLibjars = false;
+ for(int i = 0; i < args.size(); i++) {
+ if(FILES.equals(args.get(i))) {
+ String value = args.get(i + 1);
+ args.set(i + 1, value + "," + extras);
+ foundLibjars = true;
+ }
+ }
+ if(!foundLibjars) {
+ args.add(FILES);
+ args.add(extras);
+ }
+ String[] extraFiles = appConf.getStrings(AppConfig.HIVE_EXTRA_FILES);
+ StringBuilder extraFileNames = new StringBuilder();
+ //now tell LaunchMapper which files it should add to HADOOP_CLASSPATH
+ for(String file : extraFiles) {
+ Path p = new Path(file);
+ extraFileNames.append(p.getName()).append(",");
+ }
+ addDef(args, JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS, extraFileNames.toString());
+ }
return args;
}
}
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
index cd20c26..71c328d 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/TempletonDelegator.java
@@ -28,6 +28,10 @@
* http://hadoop.apache.org/docs/r1.0.4/commands_manual.html#Generic+Options
*/
public static final String ARCHIVES = "-archives";
+ /**
+ * http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/CommandsManual.html#Generic_Options
+ */
+ public static final String FILES = "-files";
protected AppConfig appConf;
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
index 1252105..1d560b6 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/JobSubmissionConstants.java
@@ -31,6 +31,12 @@
public static final String EXIT_FNAME = "exit";
public static final int WATCHER_TIMEOUT_SECS = 10;
public static final int KEEP_ALIVE_MSEC = 60 * 1000;
+ /**
+ * A comma-separated list of files to be added to HADOOP_CLASSPATH in
+ * {@link org.apache.hive.hcatalog.templeton.tool.LaunchMapper}. Used to localize additional
+ * artifacts for job submission requests.
+ */
+ public static final String HADOOP_CLASSPATH_EXTRAS = "templeton.hadoop.classpath.extras";
/*
* The = sign in the string for TOKEN_FILE_ARG_PLACEHOLDER is required because
* org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows() prepares
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
index c306841..2d1af4b 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/LaunchMapper.java
@@ -21,6 +21,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
@@ -33,7 +34,6 @@
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.StringUtils;
-import org.apache.hive.hcatalog.templeton.AppConfig;
import org.apache.hive.hcatalog.templeton.BadParam;
import org.apache.hive.hcatalog.templeton.LauncherDelegator;
@@ -115,6 +115,32 @@ private static void handleSqoop(Configuration conf, Map env) thr
}
}
}
+ private static void handleHadoopClasspathExtras(Configuration conf, Map env)
+ throws IOException {
+ if(!TempletonUtils.isset(conf.get(JobSubmissionConstants.HADOOP_CLASSPATH_EXTRAS))) {
+ return;
+ }
+ LOG.debug(HADOOP_CLASSPATH_EXTRAS + "=" + conf.get(HADOOP_CLASSPATH_EXTRAS));
+ String[] files = conf.getStrings(HADOOP_CLASSPATH_EXTRAS);
+ StringBuilder paths = new StringBuilder();
+ FileSystem fs = FileSystem.getLocal(conf);//these have been localized already
+ for(String f : files) {
+ Path p = new Path(f);
+ FileStatus fileStatus = fs.getFileStatus(p);
+ paths.append(f);
+ if(fileStatus.isDirectory()) {
+ paths.append(File.separator).append("*");
+ }
+ paths.append(File.pathSeparator);
+ }
+ paths.setLength(paths.length() - 1);
+ if(TempletonUtils.isset(System.getenv("HADOOP_CLASSPATH"))) {
+ env.put("HADOOP_CLASSPATH", System.getenv("HADOOP_CLASSPATH") + File.pathSeparator + paths);
+ }
+ else {
+ env.put("HADOOP_CLASSPATH", paths.toString());
+ }
+ }
protected Process startJob(Context context, String user, String overrideClasspath)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
@@ -135,6 +161,7 @@ protected Process startJob(Context context, String user, String overrideClasspat
Map env = TempletonUtils.hadoopUserEnv(user, overrideClasspath);
handlePigEnvVars(conf, env);
handleSqoop(conf, env);
+ handleHadoopClasspathExtras(conf, env);
List jarArgsList = new LinkedList(Arrays.asList(jarArgs));
handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER, "mapreduce.job.credentials.binary");
handleTokenFile(jarArgsList, JobSubmissionConstants.TOKEN_FILE_ARG_PLACEHOLDER_TEZ, "tez.credentials.path");
diff --git hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
index 892d79e..0062db0 100644
--- hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
+++ hcatalog/webhcat/svr/src/main/java/org/apache/hive/hcatalog/templeton/tool/TrivialExecService.java
@@ -79,29 +79,29 @@ private static void logDebugInfo(String msg, Map props) {
* Print files and directories in current directory. Will list files in the sub-directory (only 1 level deep)
* time honored tradition in WebHCat of borrowing from Oozie
*/
- private static void printContentsOfDir(String dir) {
+ private static StringBuilder printContentsOfDir(String dir, int depth, StringBuilder sb) {
+ StringBuilder indent = new StringBuilder();
+ for(int i = 0; i < depth; i++) {
+ indent.append("--");
+ }
File folder = new File(dir);
- StringBuilder sb = new StringBuilder("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n');
+ sb.append(indent).append("Files in '").append(dir).append("' dir:").append(folder.getAbsolutePath()).append('\n');
File[] listOfFiles = folder.listFiles();
+ if(listOfFiles == null) {
+ return sb;
+ }
for (File fileName : listOfFiles) {
if (fileName.isFile()) {
- sb.append("File: ").append(fileName.getName()).append('\n');
+ sb.append(indent).append("File: ").append(fileName.getName()).append('\n');
}
else if (fileName.isDirectory()) {
- sb.append("Dir: ").append(fileName.getName()).append('\n');
- File subDir = new File(fileName.getName());
- File[] moreFiles = subDir.listFiles();
- for (File subFileName : moreFiles) {
- if (subFileName.isFile()) {
- sb.append("--File: ").append(subFileName.getName()).append('\n');
- }
- else if (subFileName.isDirectory()) {
- sb.append("--Dir: ").append(subFileName.getName()).append('\n');
- }
- }
+ printContentsOfDir(fileName.getName(), depth+1, sb);
}
}
- LOG.info(sb.toString());
+ return sb;
+ }
+ private static void printContentsOfDir(String dir) {
+ LOG.info(printContentsOfDir(dir, 0, new StringBuilder()).toString());
}
}