commit 06c51f3a37c0f566b2ff5a9ccf0f37f2e8989019 Author: Bharath Krishna Date: Tue Sep 18 23:55:52 2018 -0700 HIVE-20545 : Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 30ea7f81292b0db54f4eb82468191fda38f9a0d4..b553afad307b87ee96b67d7d6f0a711a8462a675 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -512,6 +512,10 @@ public static ConfVars getMetaConf(String name) { "hive.metastore.event.message.factory", "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", "Factory class for making encoding and decoding messages in the events generated."), + EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS( + "metastore.notification.parameters.exclude.patterns", "hive.metastore.notification.parameters.exclude.patterns", + "", "List of comma-separated regexes to match the parameters to be excluded" + + " from Table and Partition parameters in HMS notifications."), EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive", "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS, "time after which events will be removed from the database listener queue"), @@ -1405,6 +1409,21 @@ public static boolean getBoolVar(Configuration conf, ConfVars var) { return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val); } + public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String[] result = conf.getTrimmedStrings(var.varname, (String[]) null); + if (result != null) { + return result; + } + if (var.hiveName != null) { + result = conf.getTrimmedStrings(var.hiveName, (String[]) null); + if (result != null) { + return result; + } + } + return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal()); + } + /** * Set the variable as a boolean * @param conf configuration file to set it in diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java index 2668b053205f48226da442ce65fcc2d7f6e76763..8ce45a3f9b00133e23c59c199efc2445e4aa33e0 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hive.metastore.messaging.json; +import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.regex.Pattern; import javax.annotation.Nullable; @@ -37,6 +39,7 @@ import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.AcidWriteEvent; import org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage; import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage; @@ -93,6 +96,17 @@ private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static String[] excludePatterns = + MetastoreConf.getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS); + private static List excludeParamsRegexes = new ArrayList<>(); + + static { + for (String pattern : excludePatterns) { + Pattern p = Pattern.compile(pattern); + excludeParamsRegexes.add(p); + } + } + @Override public MessageDeserializer getDeserializer() { return deserializer; @@ -264,6 +278,22 @@ private long now() { })); } + private static boolean matchesAnyPattern(String input) { + for (Pattern rx : excludeParamsRegexes) { + if (rx.matcher(input).matches()) { + return true; + } + } + return false; + } + + private static void filterParameters(Map parameters) { + if(parameters == null || parameters.isEmpty()) { + return; + } + parameters.entrySet().removeIf(entry -> matchesAnyPattern(entry.getKey())); + } + static String createPrimaryKeyObjJson(SQLPrimaryKey primaryKeyObj) throws TException { TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(primaryKeyObj, "UTF-8"); @@ -295,11 +325,17 @@ static String createCatalogObjJson(Catalog catObj) throws TException { } static String createTableObjJson(Table tableObj) throws TException { + if (tableObj.isSetParameters()) { + filterParameters(tableObj.getParameters()); + } TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(tableObj, "UTF-8"); } static String createPartitionObjJson(Partition partitionObj) throws TException { + if (partitionObj.isSetParameters()) { + filterParameters(partitionObj.getParameters()); + } TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(partitionObj, "UTF-8"); }