commit 7ac20cb49940f2d75928ed2ae403438f5beb3023 Author: Bharath Krishna Date: Wed Sep 12 23:35:59 2018 -0700 HIVE-20545: Exclude large-sized parameters from serialization of Table and Partition thrift objects in HMS notifications diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 30ea7f8129..de144259d8 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -512,6 +512,10 @@ public static ConfVars getMetaConf(String name) { "hive.metastore.event.message.factory", "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", "Factory class for making encoding and decoding messages in the events generated."), + EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS("metastore.notification.parameters.exclude.patterns", + "hive.metastore.notification.parameters.exclude.patterns", "", + "List of comma-separated regexes to match the parameters to be excluded" + + " from Table and Partition parameters in HMS notifications."), EVENT_DB_LISTENER_TTL("metastore.event.db.listener.timetolive", "hive.metastore.event.db.listener.timetolive", 86400, TimeUnit.SECONDS, "time after which events will be removed from the database listener queue"), @@ -1405,6 +1409,21 @@ public static boolean getBoolVar(Configuration conf, ConfVars var) { return val == null ? conf.getBoolean(var.hiveName, (Boolean)var.defaultVal) : Boolean.valueOf(val); } + public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) { + assert var.defaultVal.getClass() == String.class; + String[] result = conf.getTrimmedStrings(var.varname, (String[]) null); + if (result != null) { + return result; + } + if (var.hiveName != null) { + result = conf.getTrimmedStrings(var.hiveName, (String[]) null); + if (result != null) { + return result; + } + } + return org.apache.hadoop.util.StringUtils.getTrimmedStrings((String) var.getDefaultVal()); + } + /** * Set the variable as a boolean * @param conf configuration file to set it in diff --git standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index c681a87a1c..7d0e75c48e 100644 --- standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -56,6 +56,7 @@ import java.util.Map; import java.util.Properties; import java.util.TimeZone; +import java.util.function.Predicate; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -905,4 +906,32 @@ public static boolean isView(Table table) { } return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); } + + /** + * filters a given map with predicate provided. All entries of map whose key matches with + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicate Predicate with pattern to filter the map + */ + public static void filterMapWithPredicate(Map map, Predicate predicate) { + if (map == null) { + return; + } + map.entrySet().removeIf(entry -> predicate.test(entry.getKey())); + } + + /** + * filters a given map with list of predicates. All entries of map whose key matches with any + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicates List of predicates with patterns to filter the map + */ + public static void filterMapWithPredicates(Map map, List> predicates) { + if (map == null) { + return; + } + filterMapWithPredicate(map, predicates.stream().reduce(Predicate::or).orElse(x -> false)); + } } diff --git standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java index 2668b05320..5b3ff45ea3 100644 --- standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java +++ standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/messaging/json/JSONMessageFactory.java @@ -19,10 +19,13 @@ package org.apache.hadoop.hive.metastore.messaging.json; +import java.util.Arrays; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -37,6 +40,7 @@ import org.apache.hadoop.hive.metastore.api.SQLUniqueConstraint; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TxnToWriteId; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.events.AcidWriteEvent; import org.apache.hadoop.hive.metastore.messaging.AbortTxnMessage; import org.apache.hadoop.hive.metastore.messaging.AddForeignKeyMessage; @@ -83,6 +87,9 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Lists; +import static java.util.regex.Pattern.compile; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapWithPredicates; + /** * The JSON implementation of the MessageFactory. Constructs JSON implementations of each * message-type. @@ -93,6 +100,12 @@ private static JSONMessageDeserializer deserializer = new JSONMessageDeserializer(); + private static final List excludePatterns = Arrays.asList( + MetastoreConf.getTrimmedStringsVar(conf, MetastoreConf.ConfVars.EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS)); + + private static final List> paramsFilter = + excludePatterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + @Override public MessageDeserializer getDeserializer() { return deserializer; @@ -295,11 +308,17 @@ static String createCatalogObjJson(Catalog catObj) throws TException { } static String createTableObjJson(Table tableObj) throws TException { + //Note: The parameters of the Table object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapWithPredicates(tableObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(tableObj, "UTF-8"); } static String createPartitionObjJson(Partition partitionObj) throws TException { + //Note: The parameters of the Partition object will be removed in the filter if it matches + // any pattern provided through EVENT_NOTIFICATION_PARAMETERS_EXCLUDE_PATTERNS + filterMapWithPredicates(partitionObj.getParameters(), paramsFilter); TSerializer serializer = new TSerializer(new TJSONProtocol.Factory()); return serializer.toString(partitionObj, "UTF-8"); } diff --git standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestMetaStoreUtils.java standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestMetaStoreUtils.java new file mode 100644 index 0000000000..67377a1b70 --- /dev/null +++ standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/common/TestMetaStoreUtils.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import org.junit.Test; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.stream.Collectors; + +import static java.util.regex.Pattern.compile; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.filterMapWithPredicates; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +public class TestMetaStoreUtils { + + @Test + public void TestFilterMapWithPredicates() { + Map testMap = getTestParamMap(); + + List excludePatterns = Arrays.asList("lastDdl", "num"); + testMapFilter(testMap, excludePatterns); + assertFalse(testMap.containsKey("transient_lastDdlTime")); + assertFalse(testMap.containsKey("numFiles")); + assertFalse(testMap.containsKey("numFilesErasureCoded")); + assertFalse(testMap.containsKey("numRows")); + assertEquals(testMap.size(), 6); + + testMap = getTestParamMap(); + excludePatterns = Arrays.asList("^bucket", "ACCURATE$"); + testMapFilter(testMap, excludePatterns); + assertTrue(testMap.containsKey("testBucketing_version")); + assertFalse(testMap.containsKey("bucketing_version")); + assertTrue(testMap.containsKey("COLUMN_STATS_ACCURATED")); + assertFalse(testMap.containsKey("COLUMN_STATS_ACCURATE")); + assertEquals(testMap.size(), 8); + } + + private void testMapFilter(Map testMap, List patterns) { + List> paramsFilter = + patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + filterMapWithPredicates(testMap, paramsFilter); + } + + private Map getTestParamMap() { + return new HashMap(){{ + put("totalSize", "1024"); + put("numRows", "10"); + put("rawDataSize", "3243234"); + put("COLUMN_STATS_ACCURATE", "{\"BASIC_STATS\":\"true\""); + put("COLUMN_STATS_ACCURATED", "dummy"); + put("numFiles", "2"); + put("transient_lastDdlTime", "1537487124"); + put("bucketing_version", "2"); + put("testBucketing_version", "2"); + put("numFilesErasureCoded", "0"); + }}; + } +}