commit 0ce616985f1a3b8c74c197df15ff98d3dc33eff3 Author: Bharath Krishna Date: Tue Apr 24 16:22:10 2018 -0700 HIVE-18958 : Fix Spark config warnings - Adding --remote-driver-conf to differentiate RemoteDriver specific conf from SparkConf. - Also, removing the code that adds HIVE_SPARK_RSC_CONFIGS to SparkConf as this is done in SparkClientImpl, hence avoiding same thing done twice. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index c22fb8923d90d135cf5e55221c6345643ef4a26a..9c36154a7bb7e817776538153e3056672a131060 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -63,7 +63,7 @@ @VisibleForTesting public static final String SPARK_CLONE_CONFIGURATION = "spark.hadoop.cloneConf"; - public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String sessionId) throws Exception { + public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String sessionId) throws Exception { Map sparkConf = initiateSparkConf(hiveconf, sessionId); // Submit spark job through local spark context while spark master is local mode, otherwise submit @@ -199,13 +199,6 @@ public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf, String se "Pass Oozie configuration (%s -> %s).", propertyName, LogUtils.maskIfPassword(propertyName,value))); } - if (RpcConfiguration.HIVE_SPARK_RSC_CONFIGS.contains(propertyName)) { - String value = RpcConfiguration.getValue(hiveConf, propertyName); - sparkConf.put(propertyName, value); - LOG.debug(String.format( - "load RPC property from hive configuration (%s -> %s).", - propertyName, LogUtils.maskIfPassword(propertyName,value))); - } } final boolean optShuffleSerDe = hiveConf.getBoolVar( diff --git spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java index 255c30583a9549cf07f53ad4f520fe29dde38436..47c4add38096ed6506f7da6e1fad0fc9e7201de6 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java +++ spark-client/src/main/java/org/apache/hive/spark/client/RemoteDriver.java @@ -101,19 +101,25 @@ private RemoteDriver(String[] args) throws Exception { SparkConf conf = new SparkConf(); String serverAddress = null; int serverPort = -1; + Map mapConf = Maps.newHashMap(); for (int idx = 0; idx < args.length; idx += 2) { String key = args[idx]; - if (key.equals("--remote-host")) { + if ("--remote-host".equals(key)) { serverAddress = getArg(args, idx); - } else if (key.equals("--remote-port")) { + } else if ("--remote-port".equals(key)) { serverPort = Integer.parseInt(getArg(args, idx)); - } else if (key.equals("--client-id")) { + } else if ("--client-id".equals(key)) { conf.set(SparkClientFactory.CONF_CLIENT_ID, getArg(args, idx)); - } else if (key.equals("--secret")) { + } else if ("--secret".equals(key)) { conf.set(SparkClientFactory.CONF_KEY_SECRET, getArg(args, idx)); - } else if (key.equals("--conf")) { + } else if ("--conf".equals(key)) { String[] val = getArg(args, idx).split("[=]", 2); conf.set(val[0], val[1]); + } else if ("--remote-driver-conf".equals(key)) { + String[] val = getArg(args, idx).split("[=]", 2); + //set these only in mapConf and not in SparkConf, + // as these are non-spark specific configs used by the remote driver + mapConf.put(val[0], val[1]); } else { throw new IllegalArgumentException("Invalid command line: " + Joiner.on(" ").join(args)); @@ -124,7 +130,6 @@ private RemoteDriver(String[] args) throws Exception { LOG.info("Connecting to: {}:{}", serverAddress, serverPort); - Map mapConf = Maps.newHashMap(); for (Tuple2 e : conf.getAll()) { mapConf.put(e._1(), e._2()); LOG.debug("Remote Driver configured with: " + e._1() + "=" + e._2()); diff --git spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java index e4f72a35312e11079125cd40b8bfbe7e8947b4f2..48456e8c6e94dd18fd316e741a3a1ea654c8d680 100644 --- spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java +++ spark-client/src/main/java/org/apache/hive/spark/client/SparkClientImpl.java @@ -435,11 +435,12 @@ private Thread startDriver(final RpcServer rpcServer, final String clientId, fin argv.add("--remote-port"); argv.add(serverPort); - //hive.spark.* keys are passed down to the RemoteDriver via --conf, + //hive.spark.* keys are passed down to the RemoteDriver via --remote-driver-conf + // so that they are not used in sparkContext but only in remote driver, //as --properties-file contains the spark.* keys that are meant for SparkConf object. for (String hiveSparkConfKey : RpcConfiguration.HIVE_SPARK_RSC_CONFIGS) { String value = RpcConfiguration.getValue(hiveConf, hiveSparkConfKey); - argv.add("--conf"); + argv.add("--remote-driver-conf"); argv.add(String.format("%s=%s", hiveSparkConfKey, value)); }