commit d1767ac4d96dfc5b7cee39e8bc2f9f3c7db13935 Author: Bharath Krishna Date: Thu Sep 20 21:51:35 2018 -0700 HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 46a6d532b6..baccc2449d 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -489,6 +489,10 @@ public static ConfVars getMetaConf(String name) { "hive.metastore.event.message.factory", "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", "Factory class for making encoding and decoding messages in the events generated."), + EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns", + "hive.metastore.notification.parameters.exclude.patterns", "", + "List of comma-separated regexes to match the parameters to be excluded" + + " from Table and Partition parameters in HMS notifications."), EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive", "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS, "time after which events will be removed from the database listener queue"), @@ -1369,6 +1373,21 @@ public static boolean getBoolVar(Configuration conf, ConfVars var) { return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val); } + public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String[] result = conf.getTrimmedStrings(var.varname, (String[]) null); + if (result != null) { + return result; + } + if (var.hiveName != null) { + result = conf.getTrimmedStrings(var.hiveName, (String[]) null); + if (result != null) { + return result; + } + } + return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal()); + } + /** * Set the variable as a boolean * @param conf configuration file to set it in diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java index 07f51f0a8a..901522d9e1 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java @@ -19,10 +19,13 @@ package org.apache.hadoop.hive.metastore.messaging.json; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -39,6 +42,7 @@ import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.AcidWriteEvent; import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage; import org.apache.hadoop.hive.metastore.messaging.AddNotNullConstraintMessage; @@ -84,6 +88,9 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; +import static java.util.regex.Pattern.compile; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapWithPredicates; + /** * The JSON implementation of the MessageFactory. Constructs JSON implementations of each * message-type. @@ -94,6 +101,12 @@ private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static final List excludePatterns = Arrays.asList( + MetastoreConf.getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS)); + + private static final List> paramsFilter = + excludePatterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + @Override public MessageDeserializer getDeserializer() { return deserializer; @@ -297,11 +310,17 @@ static String createCatalogObjJson(Catalog catObj) throws TException { } static String createTableObjJson(Table tableObj) throws TException { + //Note: The parameters of the Table object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapWithPredicates(tableObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(tableObj, "UTF-8"); } static String createPartitionObjJson(Partition partitionObj) throws TException { + //Note: The parameters of the Partition object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapWithPredicates(partitionObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(partitionObj, "UTF-8"); } diff --git standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 4bc819fc2b..1c187e0b9e 100644 --- standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -104,6 +104,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -1832,4 +1833,32 @@ public static String getDefaultCatalog(Configuration conf) { return catName; } + /** + * filters a given map with predicate provided. All entries of map whose key matches with + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicate Predicate with pattern to filter the map + */ + public static void filterMapWithPredicate(Map map, Predicate predicate) { + if (map == null) { + return; + } + map.entrySet().removeIf(entry -> predicate.test(entry.getKey())); + } + + /** + * filters a given map with list of predicates. All entries of map whose key matches with any + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicates List of predicates with patterns to filter the map + */ + public static void filterMapWithPredicates(Map map, List> predicates) { + if (map == null) { + return; + } + filterMapWithPredicate(map, predicates.stream().reduce(Predicate::or).orElse(x -> false)); + } + } diff --git standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java index 55ff1502d4..3a5d26b60d 100644 --- standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java +++ standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/utils/TestMetaStoreUtils.java @@ -40,14 +40,20 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; +import static java.util.regex.Pattern.compile; import static org.apache.hadoop.hive.common.StatsSetupConst.COLUMN_STATS_ACCURATE; import static org.apache.hadoop.hive.common.StatsSetupConst.NUM_FILES; import static org.apache.hadoop.hive.common.StatsSetupConst.STATS_GENERATED; import static org.apache.hadoop.hive.common.StatsSetupConst.TOTAL_SIZE; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapWithPredicates; import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.updateTableStatsSlow; import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertThat; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; @@ -270,5 +276,48 @@ public void testUpdateTableStatsSlow_doesNotUpdateStats() throws TException { updateTableStatsSlow(db, tbl2, wh, false, false, null); verify(wh, never()).getFileStatusesForUnpartitionedTable(db, tbl2); } + + @Test + public void TestFilterMapWithPredicates() { + Map testMap = getTestParamMap(); + + List excludePatterns = Arrays.asList("lastDdl", "num"); + testMapFilter(testMap, excludePatterns); + assertFalse(testMap.containsKey("transient_lastDdlTime")); + assertFalse(testMap.containsKey("numFiles")); + assertFalse(testMap.containsKey("numFilesErasureCoded")); + assertFalse(testMap.containsKey("numRows")); + assertEquals(testMap.size(), 6); + + testMap = getTestParamMap(); + excludePatterns = Arrays.asList("^bucket", "ACCURATE$"); + testMapFilter(testMap, excludePatterns); + assertTrue(testMap.containsKey("testBucketing_version")); + assertFalse(testMap.containsKey("bucketing_version")); + assertTrue(testMap.containsKey("COLUMN_STATS_ACCURATED")); + assertFalse(testMap.containsKey("COLUMN_STATS_ACCURATE")); + assertEquals(testMap.size(), 8); + } + + private void testMapFilter(Map testMap, List patterns) { + List> paramsFilter = + patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + filterMapWithPredicates(testMap, paramsFilter); + } + + private Map getTestParamMap() { + return new HashMap(){{ + put("totalSize", "1024"); + put("numRows", "10"); + put("rawDataSize", "3243234"); + put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\""); + put("COLUMN_STATS_ACCURATED", "dummy"); + put("numFiles", "2"); + put("transient_lastDdlTime", "1537487124"); + put("bucketing_version", "2"); + put("testBucketing_version", "2"); + put("numFilesErasureCoded", "0"); + }}; + } }