commit a96d1ee10dbaa3c618c49f962debf8d5a6f632b9 Author: Sahil Takiar Date: Mon Jun 18 15:38:13 2018 -0500 HIVE-19937: Intern JobConf objects in Spark tasks diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java index 7b019b9c4d..be4d262833 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/HivePairFlatMapFunction.java @@ -20,6 +20,7 @@ import java.text.NumberFormat; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.mapred.JobConf; import org.apache.spark.TaskContext; import org.apache.spark.api.java.function.PairFlatMapFunction; @@ -48,6 +49,10 @@ public HivePairFlatMapFunction(byte[] buffer, SparkReporter sparkReporter) { protected void initJobConf() { if (jobConf == null) { jobConf = KryoSerializer.deserializeJobConf(this.buffer); + jobConf.forEach(entry -> { + StringInternUtils.internIfNotNull(entry.getKey()); + StringInternUtils.internIfNotNull(entry.getValue()); + }); SmallTableCache.initialize(jobConf); setupMRLegacyConfigs(); }