commit 9222c95c723ab5d9d7af45016f6e953095590aba Author: Sahil Takiar Date: Mon Jun 18 15:38:13 2018 -0500 HIVE-19937: Intern JobConf objects in Spark tasks diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 16d7c519fa..dbff8231ce 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -442,7 +443,8 @@ public void setChildren(Configuration hconf) throws Exception { } Map, MapOpCtx> contexts = opCtxMap.get(onefile.toString()); if (contexts == null) { - opCtxMap.put(onefile.toString(), contexts = new LinkedHashMap, MapOpCtx>()); + opCtxMap.put(StringInternUtils.internIfNotNull(onefile.toString()), + contexts = new LinkedHashMap, MapOpCtx>()); } if (contexts.containsKey(op)) { continue; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java index e03429bc37..100a8f2cd9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceWork; import org.apache.hadoop.hive.ql.plan.SparkEdgeProperty; import org.apache.hadoop.hive.ql.plan.SparkWork; @@ -72,6 +73,7 @@ import com.esotericsoftware.kryo.io.Output; import com.esotericsoftware.kryo.pool.KryoFactory; import com.esotericsoftware.kryo.pool.KryoPool; +import com.esotericsoftware.kryo.serializers.BeanSerializer; import com.esotericsoftware.kryo.serializers.FieldSerializer; /** @@ -239,7 +241,8 @@ public Kryo create() { new StdInstantiatorStrategy()); removeField(kryo, AbstractOperatorDesc.class, "colExprMap"); removeField(kryo, AbstractOperatorDesc.class, "statistics"); - kryo.register(MapWork.class); + kryo.register(MapWork.class, new BeanSerializer(kryo, MapWork.class)); + kryo.register(PartitionDesc.class, new BeanSerializer(kryo, PartitionDesc.class)); kryo.register(ReduceWork.class); kryo.register(TableDesc.class); kryo.register(UnionOperator.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java index 5bf0625c09..038b6662a2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java @@ -194,6 +194,7 @@ public void setPathToAliases(final LinkedHashMap> pathTo } public void addPathToAlias(Path path, ArrayList aliases){ + StringInternUtils.internUriStringsInPath(path); pathToAliases.put(path, aliases); } @@ -201,6 +202,7 @@ public void addPathToAlias(Path path, String newAlias){ ArrayList aliases = pathToAliases.get(path); if (aliases == null) { aliases = new ArrayList<>(); + StringInternUtils.internUriStringsInPath(path); pathToAliases.put(path, aliases); } aliases.add(newAlias.intern()); @@ -243,6 +245,9 @@ public void removePathToAlias(Path path){ } public void setPathToPartitionInfo(final LinkedHashMap pathToPartitionInfo) { + for (Path p : pathToPartitionInfo.keySet()) { + StringInternUtils.internUriStringsInPath(p); + } this.pathToPartitionInfo = pathToPartitionInfo; } @@ -693,6 +698,10 @@ public void setEventSourceColumnNameMap(Map> map) { return eventSourceColumnTypeMap; } + public Map> setEventSourceColumnTypeMap() { + return eventSourceColumnTypeMap; + } + public Map> getEventSourcePartKeyExprMap() { return eventSourcePartKeyExprMap; } @@ -739,7 +748,11 @@ public BitSet getIncludedBuckets() { public void setIncludedBuckets(BitSet includedBuckets) { // see comment next to the field - this.includedBuckets = includedBuckets.toByteArray(); + if (includedBuckets != null) { + this.includedBuckets = includedBuckets.toByteArray(); + } else { + this.includedBuckets = null; + } } public void setVectorizedRowBatch(VectorizedRowBatch vectorizedRowBatch) { @@ -824,7 +837,12 @@ public void setVectorPartitionDescList(List vectorPartition } public void setVectorizationEnabledConditionsMet(ArrayList vectorizationEnabledConditionsMet) { - this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsMet, true); + if (vectorizationEnabledConditionsMet != null) { + this.vectorizationEnabledConditionsMet = VectorizationCondition.addBooleans( + vectorizationEnabledConditionsMet, true); + } else { + this.vectorizationEnabledConditionsMet = null; + } } public List getVectorizationEnabledConditionsMet() { @@ -832,7 +850,11 @@ public void setVectorizationEnabledConditionsMet(ArrayList vectorization } public void setVectorizationEnabledConditionsNotMet(List vectorizationEnabledConditionsNotMet) { - this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + if (vectorizationEnabledConditionsNotMet != null) { + this.vectorizationEnabledConditionsNotMet = VectorizationCondition.addBooleans(vectorizationEnabledConditionsNotMet, false); + } else { + this.vectorizationEnabledConditionsNotMet = null; + } } public List getVectorizationEnabledConditionsNotMet() { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java index 61458b4e25..fed37269f3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PartitionDesc.java @@ -69,7 +69,11 @@ private VectorPartitionDesc vectorPartitionDesc; public void setBaseFileName(String baseFileName) { - this.baseFileName = baseFileName.intern(); + if (baseFileName != null) { + this.baseFileName = baseFileName.intern(); + } else { + this.baseFileName = null; + } } public PartitionDesc() {