diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 1bc3a6e..7b43ed6 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3278,6 +3278,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "hive.spark.use.groupby.shuffle", true, "Spark groupByKey transformation has better performance but uses unbounded memory." + "Turn this off when there is a memory issue."), + SPARK_LOG4J_FILE("hive.spark.log4j.file", "", "The log4j.properties file to use for the Hive-on-Spark driver and " + + "executors. If no file is specified, the default log4j.properties file from the Spark installation will be " + + "used"), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, diff --git a/data/conf/spark/standalone/hive-site.xml b/data/conf/spark/standalone/hive-site.xml index 989e65e..5fedc36 100644 --- a/data/conf/spark/standalone/hive-site.xml +++ b/data/conf/spark/standalone/hive-site.xml @@ -240,4 +240,9 @@ Internal marker for test. Used for masking env-dependent values + + hive.spark.log4j.file + ${hive.root}/data/conf/spark/log4j2.properties + + diff --git a/data/conf/spark/yarn-client/hive-site.xml b/data/conf/spark/yarn-client/hive-site.xml index 9cda40d..187ce31 100644 --- a/data/conf/spark/yarn-client/hive-site.xml +++ b/data/conf/spark/yarn-client/hive-site.xml @@ -285,4 +285,9 @@ 30000ms + + hive.spark.log4j.file + ${hive.root}/data/conf/spark/log4j2.properties + + diff --git a/itests/pom.xml b/itests/pom.xml index e5b54bf..b304b3d 100644 --- a/itests/pom.xml +++ b/itests/pom.xml @@ -100,7 +100,6 @@ } mkdir -p $DOWNLOAD_DIR download "http://d3jw87u4immizc.cloudfront.net/spark-tarball/spark-${spark.version}-bin-hadoop2-without-hive.tgz" "spark" - cp -f $HIVE_ROOT/data/conf/spark/log4j2.properties $BASE_DIR/spark/conf/ diff --git a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index d4b63f0..fe9f340 100644 --- a/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ b/spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -264,10 +264,26 @@ public void run() { osxTestOpts = Strings.nullToEmpty(System.getenv(OSX_TEST_OPTS)); } - String driverJavaOpts = Joiner.on(" ").skipNulls().join( - "-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(DRIVER_OPTS_KEY)); - String executorJavaOpts = Joiner.on(" ").skipNulls().join( - "-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(EXECUTOR_OPTS_KEY)); + // Build driver and executor java opts + StringBuilder driverJavaOptsBuilder = new StringBuilder(); + StringBuilder executorJavaOptsBuilder = new StringBuilder(); + + Joiner javaOptsJoiner = Joiner.on(" ").skipNulls(); + + javaOptsJoiner.join(driverJavaOptsBuilder, + "-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(DRIVER_OPTS_KEY)); + javaOptsJoiner.join(executorJavaOptsBuilder, + "-Dhive.spark.log.dir=" + sparkLogDir, osxTestOpts, conf.get(EXECUTOR_OPTS_KEY)); + + if (this.hiveConf.get(ConfVars.SPARK_LOG4J_FILE.varname) != null) { + javaOptsJoiner.join(driverJavaOptsBuilder, + "-Dlog4j.configuration=" + this.hiveConf.get(ConfVars.SPARK_LOG4J_FILE.varname)); + javaOptsJoiner.join(executorJavaOptsBuilder, + "-Dlog4j.configuration=" + this.hiveConf.get(ConfVars.SPARK_LOG4J_FILE.varname)); + } + + String driverJavaOpts = driverJavaOptsBuilder.toString(); + String executorJavaOpts = executorJavaOptsBuilder.toString(); // Create a file with all the job properties to be read by spark-submit. Change the // file's permissions so that only the owner can read it. This avoid having the @@ -376,6 +392,13 @@ public void run() { argv.add("org.apache.spark.deploy.SparkSubmit"); } + // If a custom log4j.properties file is specified for the driver and the executors, add it to the --files option + // of spark-submit so that it copies the file to the driver and the executors + if (this.hiveConf.get(ConfVars.SPARK_LOG4J_FILE.varname) != null) { + argv.add("--files"); + argv.add(this.hiveConf.get(ConfVars.SPARK_LOG4J_FILE.varname)); + } + if (SparkClientUtilities.isYarnClusterMode(master, deployMode)) { String executorCores = conf.get("spark.executor.cores"); if (executorCores != null) {