commit 4c15d86cea27c9fe8690449db78457691cdfed2a Author: Bharath Krishna Date: Wed Sep 12 23:35:59 2018 -0700 HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 30ea7f81292b0db54f4eb82468191fda38f9a0d4..4b001d0243e20dc9a119dbbe43783be6bb8ef0bf 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -512,6 +512,12 @@ public static ConfVars getMetaConf(String name) { "hive.metastore.event.message.factory", "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", "Factory class for making encoding and decoding messages in the events generated."), + EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns", + "hive.metastore.notification.parameters.exclude.patterns", "", + "List of comma-separated regexes that are used to reduced the size of HMS Notification messages." + + " The regexes are matched against each key of parameters map in Table or Partition object" + + "present in HMS Notification. Any key-value pair whose key is matched with any regex will" + +" be removed from Parameters map during Serialization of Table/Partition object."), EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive", "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS, "time after which events will be removed from the database listener queue"), @@ -1405,6 +1411,28 @@ public static boolean getBoolVar(Configuration conf, ConfVars var) { return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val); } + /** + * Get values from comma-separated config, to an array after extracting individual values. + * @param conf Configuration to retrieve it from + * @param var variable to retrieve + * @return Array of String, containing each value from the comma-separated config, + * or default value if value not in config file + */ + public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String[] result = conf.getTrimmedStrings(var.varname, (String[]) null); + if (result != null) { + return result; + } + if (var.hiveName != null) { + result = conf.getTrimmedStrings(var.hiveName, (String[]) null); + if (result != null) { + return result; + } + } + return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal()); + } + /** * Set the variable as a boolean * @param conf configuration file to set it in diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index c681a87a1c6b10a4f9494e49a42282cf90027ad7..ffa356b6d530ee9f24e642274025c72648ac2ac6 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -56,6 +56,7 @@ import java.util.Map; import java.util.Properties; import java.util.TimeZone; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -905,4 +906,32 @@ public static boolean isView(Table table) { } return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); } + + /** + * filters a given map with predicate provided. All entries of map whose key matches with + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicate Predicate with pattern to filter the map + */ + public static void filterMapKeys(Map map, Predicate predicate) { + if (map == null) { + return; + } + map.entrySet().removeIf(entry -> predicate.test(entry.getKey())); + } + + /** + * filters a given map with list of predicates. All entries of map whose key matches with any + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicates List of predicates with patterns to filter the map + */ + public static void filterMapkeys(Map map, List> predicates) { + if (map == null) { + return; + } + filterMapKeys(map, predicates.stream().reduce(Predicate::or).orElse(x -> false)); + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java index 2668b053205f48226da442ce65fcc2d7f6e76763..5dacae1d04c4c8ce6740a3f7d8b18f9a635d55c7 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java @@ -19,10 +19,13 @@ package org.apache.hadoop.hive.metastore.messaging.json; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -37,6 +40,7 @@ import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.AcidWriteEvent; import org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage; import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage; @@ -83,6 +87,9 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; +import static java.util.regex.Pattern.compile; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys; + /** * The JSON implementation of the MessageFactory. Constructs JSON implementations of each * message-type. @@ -93,6 +100,12 @@ private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static final List excludePatterns = Arrays.asList( + MetastoreConf.getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS)); + + private static final List> paramsFilter = + excludePatterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + @Override public MessageDeserializer getDeserializer() { return deserializer; @@ -295,11 +308,17 @@ static String createCatalogObjJson(Catalog catObj) throws TException { } static String createTableObjJson(Table tableObj) throws TException { + //Note: The parameters of the Table object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapkeys(tableObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(tableObj, "UTF-8"); } static String createPartitionObjJson(Partition partitionObj) throws TException { + //Note: The parameters of the Partition object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapkeys(partitionObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(partitionObj, "UTF-8"); } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java index 30de1c4cfa1cf019186b10583a06da0bf5491634..b05cb54e77132ab776bec89f46ba5e28d84c9a63 100644 --- standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreServerUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.metastore.utils; import com.google.common.collect.ImmutableMap; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; @@ -30,30 +31,31 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.Partition; -import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; -import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.thrift.TException; import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; -import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import static java.util.regex.Pattern.compile; import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE; import static org.apache.hadoop.hive.common.StatsSetupConst.FAST_STATS; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_ERASURE_CODED_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapkeys; import static org.hamcrest.core.Is.is; -import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; @@ -379,5 +381,84 @@ private static FileStatus getFileStatus(long fileLength, boolean isdir, int bloc new Path(pathString), false, false, isErasureCoded); } + @Test + public void testFilterMapWithPredicates() { + Map testMap = getTestParamMap(); + + List excludePatterns = Arrays.asList("lastDdl", "num"); + testMapFilter(testMap, excludePatterns); + assertFalse(testMap.containsKey("transient_lastDdlTime")); + assertFalse(testMap.containsKey("numFiles")); + assertFalse(testMap.containsKey("numFilesErasureCoded")); + assertFalse(testMap.containsKey("numRows")); + + Map expectedMap = new HashMap() {{ + put("totalSize", "1024"); + put("rawDataSize", "3243234"); + put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\""); + put("COLUMN_STATS_ACCURATED", "dummy"); + put("bucketing_version", "2"); + put("testBucketing_version", "2"); + }}; + + assertThat(expectedMap, is(testMap)); + + testMap = getTestParamMap(); + excludePatterns = Arrays.asList("^bucket", "ACCURATE$"); + testMapFilter(testMap, excludePatterns); + + expectedMap = new HashMap() {{ + put("totalSize", "1024"); + put("numRows", "10"); + put("rawDataSize", "3243234"); + put("COLUMN_STATS_ACCURATED", "dummy"); + put("numFiles", "2"); + put("transient_lastDdlTime", "1537487124"); + put("testBucketing_version", "2"); + put("numFilesErasureCoded", "0"); + }}; + + assertThat(expectedMap, is(testMap)); + + // test that if the config is not set in MetastoreConf, it does not filter any parameter + Configuration testConf = MetastoreConf.newMetastoreConf(); + testMap = getTestParamMap(); + excludePatterns = Arrays.asList(MetastoreConf + .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS)); + + testMapFilter(testMap, excludePatterns); + assertThat(getTestParamMap(), is(testMap)); + + + // test that if the config is set to empty String in MetastoreConf, it does not filter any parameter + testConf.setStrings(MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS.getVarname(), ""); + testMap = getTestParamMap(); + excludePatterns = Arrays.asList(MetastoreConf + .getTrimmedStringsVar(testConf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS)); + + testMapFilter(testMap, excludePatterns); + assertThat(getTestParamMap(), is(testMap)); + } + + private void testMapFilter(Map testMap, List patterns) { + List> paramsFilter = + patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + filterMapkeys(testMap, paramsFilter); + } + + private Map getTestParamMap() { + return new HashMap() {{ + put("totalSize", "1024"); + put("numRows", "10"); + put("rawDataSize", "3243234"); + put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\""); + put("COLUMN_STATS_ACCURATED", "dummy"); + put("numFiles", "2"); + put("transient_lastDdlTime", "1537487124"); + put("bucketing_version", "2"); + put("testBucketing_version", "2"); + put("numFilesErasureCoded", "0"); + }}; + } }