diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index b36c60e..b1df1d6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -53,7 +53,7 @@ private static final String SPARK_DEFAULT_APP_NAME = "Hive on Spark"; private static final String SPARK_DEFAULT_SERIALIZER = "org.apache.spark.serializer.KryoSerializer"; private static final String SPARK_DEFAULT_REFERENCE_TRACKING = "false"; - private static final String SPARK_YARN_REPORT_INTERVAL = "spark.yarn.report.interval"; + private static final String SPARK_WAIT_APP_COMPLETE = "spark.yarn.submit.waitAppCompletion"; public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Exception { Map sparkConf = initiateSparkConf(hiveconf); @@ -188,12 +188,9 @@ public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Ex } } - //The application reports tend to spam the hive logs. This is controlled by spark, and the default seems to be 1s. - //If it is not specified, set it to a much higher number. It can always be overriden by user. - String sparkYarnReportInterval = sparkConf.get(SPARK_YARN_REPORT_INTERVAL); - if (sparkMaster.startsWith("yarn") && sparkYarnReportInterval == null) { - //the new version of spark also takes time-units, but old versions do not. - sparkConf.put(SPARK_YARN_REPORT_INTERVAL, "60000"); + // Disable it to avoid verbose app state report in yarn-cluster mode + if (sparkMaster.equals("yarn-cluster") && sparkConf.get(SPARK_WAIT_APP_COMPLETE) == null) { + sparkConf.put(SPARK_WAIT_APP_COMPLETE, "false"); } return sparkConf;