commit 36223dc9eefd1e27509dbc9483d2d9476ba3b7ea Author: Bharath Krishna Date: Tue Apr 24 16:22:10 2018 -0700 HIVE-18958 : Fix Spark config warnings - Adding --remote-driver-conf to differentiate RemoteDriver specific conf from SparkConf. - Also, removing the code that adds HIVE_SPARK_RSC_CONFIGS to SparkConf as this is done in SparkClientImpl, hence avoiding same thing done twice. diff --git data/conf/spark/yarn-client/hive-site.xml data/conf/spark/yarn-client/hive-site.xml index 6c633623abdf5d6b0abec50f7b2646ce8c74761d..265e398b01c62d52c325eaee3929006219de45f3 100644 --- data/conf/spark/yarn-client/hive-site.xml +++ data/conf/spark/yarn-client/hive-site.xml @@ -245,7 +245,7 @@ - spark.yarn.executor.memoryOverhead + spark.executor.memoryOverhead 0 @@ -255,7 +255,7 @@ - spark.yarn.driver.memoryOverhead + spark.driver.memoryOverhead 0 diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index c22fb8923d90d135cf5e55221c6345643ef4a26a..565c43b9c29faa9d40ddd01899276080a25d819e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -41,7 +41,6 @@ import org.apache.hadoop.hive.ql.io.HiveKey; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; -import org.apache.hive.spark.client.rpc.RpcConfiguration; import org.apache.spark.SparkConf; import com.google.common.base.Joiner; @@ -199,13 +198,6 @@ public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String se "Pass Oozie configuration (%s -> %s).", propertyName, LogUtils.maskIfPassword(propertyName,value))); } - if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { - String value = RpcConfiguration.getValue(hiveConf, propertyName); - sparkConf.put(propertyName, value); - LOG.debug(String.format( - "load RPC property from hive configuration (%s -> %s).", - propertyName, LogUtils.maskIfPassword(propertyName,value))); - } } final boolean optShuffleSerDe = hiveConf.getBoolVar( diff --git spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java index 255c30583a9549cf07f53ad4f520fe29dde38436..47c4add38096ed6506f7da6e1fad0fc9e7201de6 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java +++ spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java @@ -101,19 +101,25 @@ private RemoteDriver(String[] args) throws Exception { SparkConf conf = new SparkConf(); String serverAddress = null; int serverPort = -1; + Map mapConf = Maps.newHashMap(); for (int idx = 0; idx < args.length; idx += 2) { String key = args[idx]; - if (key.equals("--remote-host")) { + if ("--remote-host".equals(key)) { serverAddress = getArg(args, idx); - } else if (key.equals("--remote-port")) { + } else if ("--remote-port".equals(key)) { serverPort = Integer.parseInt(getArg(args, idx)); - } else if (key.equals("--client-id")) { + } else if ("--client-id".equals(key)) { conf.set(SparkClientFactory.CONF_CLIENT_ID, getArg(args, idx)); - } else if (key.equals("--secret")) { + } else if ("--secret".equals(key)) { conf.set(SparkClientFactory.CONF_KEY_SECRET, getArg(args, idx)); - } else if (key.equals("--conf")) { + } else if ("--conf".equals(key)) { String[] val = getArg(args, idx).split("[=]", 2); conf.set(val[0], val[1]); + } else if ("--remote-driver-conf".equals(key)) { + String[] val = getArg(args, idx).split("[=]", 2); + //set these only in mapConf and not in SparkConf, + // as these are non-spark specific configs used by the remote driver + mapConf.put(val[0], val[1]); } else { throw new IllegalArgumentException("Invalid command line: " + Joiner.on(" ").join(args)); @@ -124,7 +130,6 @@ private RemoteDriver(String[] args) throws Exception { LOG.info("Connecting to: {}:{}", serverAddress, serverPort); - Map mapConf = Maps.newHashMap(); for (Tuple2 e : conf.getAll()) { mapConf.put(e._1(), e._2()); LOG.debug("Remote Driver configured with: " + e._1() + "=" + e._2()); diff --git spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index e4f72a35312e11079125cd40b8bfbe7e8947b4f2..48456e8c6e94dd18fd316e741a3a1ea654c8d680 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -435,11 +435,12 @@ private Thread startDriver(final RpcServer rpcServer, final String clientId, fin argv.add("--remote-port"); argv.add(serverPort); - //hive.spark.* keys are passed down to the RemoteDriver via --conf, + //hive.spark.* keys are passed down to the RemoteDriver via --remote-driver-conf + // so that they are not used in sparkContext but only in remote driver, //as --properties-file contains the spark.* keys that are meant for SparkConf object. for (String hiveSparkConfKey : RpcConfiguration.HIVE_SPARK_RSC_CONFIGS) { String value = RpcConfiguration.getValue(hiveConf, hiveSparkConfKey); - argv.add("--conf"); + argv.add("--remote-driver-conf"); argv.add(String.format("%s=%s", hiveSparkConfKey, value)); }