diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 8a906ce7ce..28a10d4409 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -3437,6 +3437,9 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), SPARK_STAGE_MAX_TASKS("hive.spark.stage.max.tasks", -1, "The maximum number of tasks a stage in a Spark job may have.\n" + "If a Spark job stage contains more tasks than the maximum, the job will be cancelled. A value of -1 means no limit."), + SPARK_DYNAMIC_RDD_CACHEING_OPTIMIZATION("hive.spark.dynamic.rdd.cacheing.optimization", true, "Whether or not " + + "Hive should dynamically cache intermediate Spark RDD objects.\nHive will cache parts of the physical plan " + + "that have mulitiple children. RDDs will be persisted with storage level = MEMORY_AND_DISK"), NWAYJOINREORDER("hive.reorder.nway.joins", true, "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java index 079ec424f4..74b73e13c7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SparkPlanGenerator.java @@ -254,6 +254,10 @@ private SparkTran generate(BaseWork work, SparkWork sparkWork) throws Exception } private boolean isCachingWork(BaseWork work, SparkWork sparkWork) { + if (!HiveConf.getBoolVar(jobConf, HiveConf.ConfVars.SPARK_DYNAMIC_RDD_CACHEING_OPTIMIZATION)) { + return false; + } + boolean caching = true; List children = sparkWork.getChildren(work); if (children.size() < 2) {