commit 24cd68bc395f9b8020430ecddd667c1d449ea033 Author: Misha Dmitriev Date: Wed Aug 2 18:03:00 2017 -0700 HIVE-17237: HMS wastes 26.4% of memory due to dup strings in metastore.api.Partition.parameters diff --git a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java index aac3a535879d6cc8ff9186ecf29569aa81f59768..dc3ee98334fb4529ee1edc0b1e587ba1d87534d1 100644 --- a/common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ b/common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -87,18 +87,11 @@ }).with( new LookupTranslator(EntityArrays.JAVA_CTRL_CHARS_ESCAPE())); - /** - * Maintain a String pool to reduce memory. - */ - private static final Interner STRING_INTERNER; - static { NumberFormat numberFormat = NumberFormat.getNumberInstance(Locale.ENGLISH); decimalFormat = (DecimalFormat) numberFormat; decimalFormat.applyPattern("#.##"); - - STRING_INTERNER = Interners.newWeakInterner(); - } +} /** * Return the internalized string, or null if the given string is null. @@ -109,7 +102,7 @@ public static String intern(String str) { if(str == null) { return null; } - return STRING_INTERNER.intern(str); + return str.intern(); } /** diff --git a/metastore/src/gen/thrift/gen-py/__init__.py b/metastore/src/gen/thrift/gen-py/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java index 08e2c504149dc38628a1de40fddbaa1cda490fde..68769167fac4473624fac509fec40d29230d0eba 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Partition.java @@ -402,7 +402,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hadoop.hive.metastore.utils.StringUtils.intern(key), org.apache.hadoop.hive.metastore.utils.StringUtils.intern(val)); } public Map getParameters() { @@ -1002,7 +1002,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Partition struct) t } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1215,7 +1215,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Partition struct) th struct.parameters.put(_key223, _val224); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } if (incoming.get(7)) { struct.privileges = new PrincipalPrivilegeSet(); diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java index 986e0a234f57c8d13ad88a86b70c7392d7997f3f..452063f598ba750fd5e0c02ffb9bfd2b40a9a4ca 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/SerDeInfo.java @@ -227,7 +227,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hadoop.hive.metastore.utils.StringUtils.intern(key), org.apache.hadoop.hive.metastore.utils.StringUtils.intern(val)); } public Map getParameters() { @@ -534,7 +534,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, SerDeInfo struct) t } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -646,7 +646,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, SerDeInfo struct) th struct.parameters.put(_key111, _val112); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } } } diff --git a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java index 8b40acae80668c3bb3d4c3f49a53ffc06b74232f..ad3c6276ef32190b4225f1161a8f6c9284c7220d 100644 --- a/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java +++ b/standalone-metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/StorageDescriptor.java @@ -561,7 +561,7 @@ public void putToParameters(String key, String val) { if (this.parameters == null) { this.parameters = new HashMap(); } - this.parameters.put(key, val); + this.parameters.put(org.apache.hadoop.hive.metastore.utils.StringUtils.intern(key), org.apache.hadoop.hive.metastore.utils.StringUtils.intern(val)); } public Map getParameters() { @@ -1407,7 +1407,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, StorageDescriptor s } iprot.readMapEnd(); } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } else { org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -1730,7 +1730,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, StorageDescriptor st struct.parameters.put(_key187, _val188); } } - struct.setParametersIsSet(true); + struct.parameters = org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters); struct.setParametersIsSet(true); } if (incoming.get(10)) { struct.skewedInfo = new SkewedInfo(); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StringUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StringUtils.java index 6a025a11b79f9ddcec5cd00d75286d96a6bdba07..4449799dc96df869ce53e886ec576746134aea83 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StringUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StringUtils.java @@ -32,18 +32,16 @@ public class StringUtils { - private static final Interner STRING_INTERNER = Interners.newWeakInterner(); - /** * Return the internalized string, or null if the given string is null. * @param str The string to intern * @return The identical string cached in the string pool. */ public static String intern(String str) { - if(str == null) { + if (str == null) { return null; } - return STRING_INTERNER.intern(str); + return str.intern(); } /** diff --git a/standalone-metastore/src/main/resources/thrift-replacements.txt b/standalone-metastore/src/main/resources/thrift-replacements.txt index 72511318e4f8f470f8886852183d0564dfab9805..01ee71a5b10f30d0514ad4cf41c2c767c1f86e5a 100644 --- a/standalone-metastore/src/main/resources/thrift-replacements.txt +++ b/standalone-metastore/src/main/resources/thrift-replacements.txt @@ -59,3 +59,11 @@ this\.tableName\ \=\ other\.tableName;=this.tableName\ \=\ org.apache.hadoop.hiv __this__parameters_copy_key\ \=\ other_element_key;=__this__parameters_copy_key\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other_element_key); __this__parameters_copy_value\ \=\ other_element_value;=__this__parameters_copy_value\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other_element_value); __this_values\.add(other_element);=__this_values.add(org.apache.hadoop.hive.metastore.utils.StringUtils.intern(other_element)); + +# Fix methods in Partition.java that call Map.put(String key, String value) + +this\.parameters\.put\(key,\ val\);=this.parameters.put(org.apache.hadoop.hive.metastore.utils.StringUtils.intern(key),\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(val)); + +# Fix the deserialization methods in Partitions.java: intern parameters after it's deserialized + +struct\.setParametersIsSet\(true\);=struct.parameters\ \=\ org.apache.hadoop.hive.metastore.utils.StringUtils.intern(struct.parameters);\ struct.setParametersIsSet(true);