commit dbc79b985654470291143fde8a045375cb2268ac Author: Misha Dmitriev Date: Wed Aug 2 18:03:00 2017 -0700 HIVE-17237: HMS wastes 26.4% of memory due to dup strings in metastore.api.Partition.parameters diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index aac3a535879d6cc8ff9186ecf29569aa81f59768..dc3ee98334fb4529ee1edc0b1e587ba1d87534d1 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -87,18 +87,11 @@ }).with( new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); - /** - * Maintain a String pool to reduce memory. - */ - private static final Interner STRING_INTERNER; - static { NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH); decimalFormat = (DecimalFormat) numberFormat; decimalFormat.applyPattern("#.##"); - - STRING_INTERNER = Interners.newWeakInterner(); - } +} /** * Return the internalized string, or null if the given string is null. @@ -109,7 +102,7 @@ public static String intern(String str) { if(str == null) { return null; } - return STRING_INTERNER.intern(str); + return str.intern(); } /** diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java index a24722151a0a88e0609ac305c71890defd1f29ce..1dcd4e8f4234cb51e07b668a0d99217560e32cdc 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java @@ -402,7 +402,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hive.common.util.HiveStringUtils.intern(key), org.apache.hive.common.util.HiveStringUtils.intern(val)); } public Map getParameters() { @@ -1002,7 +1002,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Partition struct) t } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1215,7 +1215,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Partition struct) th struct.parameters.put(_key223, _val224); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } if (incoming.get(7)) { struct.privileges = new PrincipalPrivilegeSet(); diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java index d5a8d999c3227ad35e4a163a3bbccd1a6a34e17c..e2a1009b19640b6d9ae69b8e8148012f13117b55 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java @@ -227,7 +227,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hive.common.util.HiveStringUtils.intern(key), org.apache.hive.common.util.HiveStringUtils.intern(val)); } public Map getParameters() { @@ -534,7 +534,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -646,7 +646,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, SerDeInfo struct) th struct.parameters.put(_key111, _val112); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } } } diff --git a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java index 938f06bbce7a2b213e901f153e1da4606339c0cf..a4eca8d5728821b6c35a425fa87caaf2da3e37e6 100644 --- a/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java +++ b/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java @@ -561,7 +561,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hive.common.util.HiveStringUtils.intern(key), org.apache.hive.common.util.HiveStringUtils.intern(val)); } public Map getParameters() { @@ -1407,7 +1407,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1730,7 +1730,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StorageDescriptor st struct.parameters.put(_key187, _val188); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } if (incoming.get(10)) { struct.skewedInfo = new SkewedInfo(); diff --git a/metastore/src/main/resources/thrift-replacements.txt b/metastore/src/main/resources/thrift-replacements.txt index 528ed08cc074cea22feb918d3d774114bcbb0705..4b33243e3a5686fae34f793c9965a8e87902803b 100644 --- a/metastore/src/main/resources/thrift-replacements.txt +++ b/metastore/src/main/resources/thrift-replacements.txt @@ -58,3 +58,11 @@ this\.tableName\ \=\ other\.tableName;=this.tableName\ \=\ org.apache.hive.commo __this__parameters_copy_key\ \=\ other_element_key;=__this__parameters_copy_key\ \=\ org.apache.hive.common.util.HiveStringUtils.intern(other_element_key); __this__parameters_copy_value\ \=\ other_element_value;=__this__parameters_copy_value\ \=\ org.apache.hive.common.util.HiveStringUtils.intern(other_element_value); __this_values\.add(other_element);=__this_values.add(org.apache.hive.common.util.HiveStringUtils.intern(other_element)); + +# Fix methods in Partition.java that call Map.put(String key, String value) + +this\.parameters\.put\(key,\ val\);=this.parameters.put(org.apache.hive.common.util.HiveStringUtils.intern(key),\ org.apache.hive.common.util.HiveStringUtils.intern(val)); + +# Fix the deserialization methods in Partitions.java: intern parameters after it's deserialized + +struct\.setParametersIsSet\(true\);=struct.parameters\ \=\ org.apache.hive.common.util.HiveStringUtils.intern(struct.parameters);\ struct.setParametersIsSet(true); \ No newline at end of file