diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java index 1798622..ad93480 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HiveSparkClientFactory.java @@ -52,6 +52,7 @@ private static final String SPARK_DEFAULT_APP_NAME = "Hive on Spark"; private static final String SPARK_DEFAULT_SERIALIZER = "org.apache.spark.serializer.KryoSerializer"; private static final String SPARK_DEFAULT_REFERENCE_TRACKING = "false"; + private static final String SPARK_YARN_REPORT_INTERVAL = "spark.yarn.report.interval"; public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Exception { Map sparkConf = initiateSparkConf(hiveconf); @@ -183,6 +184,14 @@ public static HiveSparkClient createHiveSparkClient(HiveConf hiveconf) throws Ex } } + //The application reports tend to spam the hive logs. This is controlled by spark, and the default seems to be 1s. + //If it is not specified, set it to a much higher number. It can always be overriden by user. + String sparkYarnReportInterval = sparkConf.get(SPARK_YARN_REPORT_INTERVAL); + if (sparkMaster.startsWith("yarn") && sparkYarnReportInterval == null) { + //the new version of spark also takes time-units, but old versions do not. + sparkConf.put(SPARK_YARN_REPORT_INTERVAL, "60000"); + } + return sparkConf; }