commit 3312a0ec94364a11b02428fb031c6197e986e635 Author: Bharath Krishna Date: Tue Apr 24 16:22:10 2018 -0700 HIVE-18958 : Fix Spark config warnings - Adding --remote-driver-conf to differentiate RemoteDriver specific conf from SparkConf. - Also, removing the code that adds HIVE_SPARK_RSC_CONFIGS to SparkConf as this is done in SparkClientImpl, hence avoiding same thing done twice. - Refactoring config keys to be static strings diff --git data/conf/spark/yarn-client/hive-site.xml data/conf/spark/yarn-cluster/hive-site.xml similarity index 98% rename from data/conf/spark/yarn-client/hive-site.xml rename to data/conf/spark/yarn-cluster/hive-site.xml index 6c633623abdf5d6b0abec50f7b2646ce8c74761d..265e398b01c62d52c325eaee3929006219de45f3 100644 --- data/conf/spark/yarn-client/hive-site.xml +++ data/conf/spark/yarn-cluster/hive-site.xml @@ -245,7 +245,7 @@ - spark.yarn.executor.memoryOverhead + spark.executor.memoryOverhead 0 @@ -255,7 +255,7 @@ - spark.yarn.driver.memoryOverhead + spark.driver.memoryOverhead 0 diff --git itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java index 8ee7fb9693be85af40d068e666e5024bf4bdc8a1..6628336807b06cab49063673be0d8e9c5b5a7101 100644 --- itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java +++ itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CliConfigs.java @@ -579,7 +579,7 @@ public SparkOnYarnCliConfig() { setInitScript("q_test_init.sql"); setCleanupScript("q_test_cleanup.sql"); - setHiveConfDir("data/conf/spark/yarn-client"); + setHiveConfDir("data/conf/spark/yarn-cluster"); setClusterType(MiniClusterType.miniSparkOnYarn); } catch (Exception e) { throw new RuntimeException("can't construct cliconfig", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index c22fb8923d90d135cf5e55221c6345643ef4a26a..565c43b9c29faa9d40ddd01899276080a25d819e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; -import org.apache.hive.spark.client.rpc.RpcConfiguration; import org.apache.spark.SparkConf; import com.google.common.base.Joiner; @@ -199,13 +198,6 @@ public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String se "Pass Oozie configuration (%s -> %s).", propertyName, LogUtils.maskIfPassword(propertyName,value))); } - if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { - String value = RpcConfiguration.getValue(hiveConf, propertyName); - sparkConf.put(propertyName, value); - LOG.debug(String.format( - "load RPC property from hive configuration (%s -> %s).", - propertyName, LogUtils.maskIfPassword(propertyName,value))); - } } final boolean optShuffleSerDe = hiveConf.getBoolVar( diff --git spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java index 255c30583a9549cf07f53ad4f520fe29dde38436..6e546d4548b0015ba6a0691dcee05e3be33d5bc1 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java +++ spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java @@ -90,6 +90,10 @@ private volatile JobContextImpl jc; private volatile boolean running; + public static final String REMOTE_DRIVER_HOST_CONF = "--remote-host"; + public static final String REMOTE_DRIVER_PORT_CONF = "--remote-port"; + public static final String REMOTE_DRIVER_CONF = "--remote-driver-conf"; + private RemoteDriver(String[] args) throws Exception { this.activeJobs = Maps.newConcurrentMap(); this.jcLock = new Object(); @@ -101,22 +105,20 @@ private RemoteDriver(String[] args) throws Exception { SparkConf conf = new SparkConf(); String serverAddress = null; int serverPort = -1; + Map mapConf = Maps.newHashMap(); for (int idx = 0; idx < args.length; idx += 2) { String key = args[idx]; - if (key.equals("--remote-host")) { + if (REMOTE_DRIVER_HOST_CONF.equals(key)) { serverAddress = getArg(args, idx); - } else if (key.equals("--remote-port")) { + } else if (REMOTE_DRIVER_PORT_CONF.equals(key)) { serverPort = Integer.parseInt(getArg(args, idx)); - } else if (key.equals("--client-id")) { - conf.set(SparkClientFactory.CONF_CLIENT_ID, getArg(args, idx)); - } else if (key.equals("--secret")) { - conf.set(SparkClientFactory.CONF_KEY_SECRET, getArg(args, idx)); - } else if (key.equals("--conf")) { + } else if (REMOTE_DRIVER_CONF.equals(key)) { String[] val = getArg(args, idx).split("[=]", 2); - conf.set(val[0], val[1]); + //set these only in mapConf and not in SparkConf, + // as these are non-spark specific configs used by the remote driver + mapConf.put(val[0], val[1]); } else { - throw new IllegalArgumentException("Invalid command line: " - + Joiner.on(" ").join(args)); + throw new IllegalArgumentException("Invalid command line: " + Joiner.on(" ").join(args)); } } @@ -124,7 +126,6 @@ private RemoteDriver(String[] args) throws Exception { LOG.info("Connecting to: {}:{}", serverAddress, serverPort); - Map mapConf = Maps.newHashMap(); for (Tuple2 e : conf.getAll()) { mapConf.put(e._1(), e._2()); LOG.debug("Remote Driver configured with: " + e._1() + "=" + e._2()); diff --git spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index e4f72a35312e11079125cd40b8bfbe7e8947b4f2..d4505153599e1b83778efc31c8e7aa09c2b33f7a 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -430,16 +430,17 @@ private Thread startDriver(final RpcServer rpcServer, final String clientId, fin } argv.add(jar); - argv.add("--remote-host"); + argv.add(RemoteDriver.REMOTE_DRIVER_HOST_CONF); argv.add(serverAddress); - argv.add("--remote-port"); + argv.add(RemoteDriver.REMOTE_DRIVER_PORT_CONF); argv.add(serverPort); - //hive.spark.* keys are passed down to the RemoteDriver via --conf, + //hive.spark.* keys are passed down to the RemoteDriver via REMOTE_DRIVER_CONF + // so that they are not used in sparkContext but only in remote driver, //as --properties-file contains the spark.* keys that are meant for SparkConf object. for (String hiveSparkConfKey : RpcConfiguration.HIVE_SPARK_RSC_CONFIGS) { String value = RpcConfiguration.getValue(hiveConf, hiveSparkConfKey); - argv.add("--conf"); + argv.add(RemoteDriver.REMOTE_DRIVER_CONF); argv.add(String.format("%s=%s", hiveSparkConfKey, value)); }