diff --git common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java new file mode 100644 index 0000000000..55ef446830 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java @@ -0,0 +1,501 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; +import org.apache.commons.lang3.NotImplementedException; +import java.io.InputStream; +import java.io.PrintStream; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Properties; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.Map; +import java.util.Iterator; +import java.util.Collections; +import java.util.Collection; +import java.io.Reader; +import java.io.PrintWriter; +import java.io.OutputStream; +import java.io.IOException; + +/** + * Implementation of Properties to save memory. + * When multiple HiveConf objects are created. Most of the time these + * Configuration object have the same base and each session add their own + * Properties to it. With this implementation we can intern the base + * to prevent this overhead. + */ +public class HiveConfProperties extends Properties { + + private final Properties interned; + + //Used for calculating size. + private int duplicatedPropertiesCount; + + private static Interner interner = Interners.newWeakInterner(); + + public HiveConfProperties(Properties p) { + interned = interner.intern(p); + duplicatedPropertiesCount=0; + } + + /************* Public API of java.util.Properties ************/ + + /** + * If non-interned (super) contains return that value. + * if not return the value from the base (interned). + */ + @Override + public String getProperty(String key) { + String property = super.getProperty(key); + if (property == null) { + property = interned.getProperty(key); + } + return property; + } + + /** + * If non-interned (super) contains return that value. + * - we can't use the super's getProperty(key, defaulValue) + * otherwise it'd return the defaultValue instead of the value from interned - + * if not return the value from the base (interned). + */ + @Override + public String getProperty(String key, String defaultValue) { + String property = super.getProperty(key); + if (property == null) { + property = interned.getProperty(key, defaultValue); + } + return property; + } + + /** + * Setting the Property in non-interned (super). + * Duplicates can happen this way: if interned already contains a property + * that is being added in the non-interned, there will be 2 properties with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same. + */ + @Override + public synchronized Object setProperty(String key, String value) { + if(interned.containsKey(key)){ + duplicatedPropertiesCount++; + } + return super.setProperty(key, value); + } + + /** + * Merge the keys of the two parts (interned, non-interned) and return. + */ + @Override + public Set stringPropertyNames() { + Set keys = new HashSet<>(); + keys.addAll(interned.stringPropertyNames()); + keys.addAll(super.stringPropertyNames()); + return keys; + } + + /** + * The same as HashTable's keys function. + */ + @Override + public Enumeration propertyNames() { + return keys(); + } + + /************* Not used java.util.Properties functions in Configuration class. ************/ + + @Override + public void list(PrintStream out) { + throw new NotImplementedException("HiveConfProperties.list not implemented"); + } + + @Override + public void list(PrintWriter out) { + throw new NotImplementedException("HiveConfProperties.list not implemented"); + } + + @Override + public synchronized void load(InputStream inStream) throws IOException { + throw new NotImplementedException("HiveConfProperties.load not implemented"); + } + + @Override + public synchronized void load(Reader reader) throws IOException { + throw new NotImplementedException("HiveConfProperties.load not implemented"); + } + + @Override + public synchronized void loadFromXML(InputStream inStream) throws IOException { + throw new NotImplementedException("HiveConfProperties.loadFromXML not implemented"); + } + + @Override + public void store(OutputStream out, String comments) throws IOException { + throw new NotImplementedException("HiveConfProperties.store not implemented"); + } + + @Override + public void storeToXML(OutputStream os, String comment) throws IOException { + throw new NotImplementedException("HiveConfProperties.storeToXML not implemented"); + } + + @Override + public void storeToXML(OutputStream os, String comment, String encoding) + throws IOException { + throw new NotImplementedException("HiveConfProperties.storeToXML not implemented"); + } + + /************* Public API of java.util.Hashtable ************/ + + /** + * Size of HiveConfProperties is the size of interned + size of non-interned - duplicatedPropertiesCount. + * We have to subtract the duplicatedPropertiesCount, because duplicates can happen: + * If we set a property which is already in the interned, we have a duplicate. + */ + @Override + public synchronized int size() { + return super.size() + interned.size() - duplicatedPropertiesCount; + } + + /** + * If non-interned (super) contains return that value. + * If not return the value from the base (interned). + */ + @Override + public synchronized Object get(Object key) { + Object o = super.get(key); + if (o == null) { + o = interned.get(key); + } + return o; + } + + /** + * If non-interned (super) contains return that value. + * - we can't use the super's get(key, defaultValue) + * otherwise it'd return the defaultValue instead of the value from interned - + * If not return the value from the base (interned). + */ + @Override + public synchronized Object getOrDefault(Object key, Object defaultValue) { + Object property = super.get(key); + if (property == null) { + property = interned.get(key); + } + return property == null ? defaultValue : property; + } + + /** + * Merge the two Properties (interned, non-interned) with putAll and return the merged Properties. + */ + @Override + public synchronized Object clone() { + Properties properties = new Properties(); + properties.putAll(this); + properties.putAll(interned); + return properties; + } + + /** + * Return true if non-interned or interned contains the value. + */ + @Override + public synchronized boolean contains(Object value) { + return super.contains(value) || interned.contains(value); + } + + /** + * Return true if non-interned or interned contains the key. + */ + @Override + public synchronized boolean containsKey(Object key) { + return super.containsKey(key) || interned.containsKey(key); + } + + /** + * Return true if non-interned or interned contains the value. + */ + @Override + public synchronized boolean containsValue(Object value) { + return super.containsValue(value) || interned.containsValue(value); + } + + @Override + public synchronized int hashCode() { + return super.hashCode() & interned.hashCode(); + } + + /** + * Implementation of equals follows the logic of the equals implementation in HashTable. + * But we need to divide into two parts: when other is HiveConfProperties and when not. + * If other is HiveConfProperties -> need to check the interned for equality as well. + */ + @Override + public synchronized boolean equals(Object o) { + if (o == this) { + return true; + } + + if (!(o instanceof Map)) { + return false; + } + Map t = (Map) o; + //If other is HiveConfProperties we need to check the interned Properties as well. + if (t instanceof HiveConfProperties) { + HiveConfProperties otherHiveConfProperties = (HiveConfProperties) t; + //If sizes do not match => they are not equals + if (otherHiveConfProperties.size() != this.size()) { + return false; + } + try { + Iterator> i = super.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + //If value is null "other" Properties shouldn't contain key + if (value == null) { + //Note that HiveConfProperties.get will be called so interned is counted + if (!(otherHiveConfProperties.get(key) == null && otherHiveConfProperties.containsKey(key))) { + return false; + } + } else { + //If value is not null, value from this should be equal to value from other + if (!value.equals(otherHiveConfProperties.get(key))) { + return false; + } + } + } + //Going through this.interned Properties as well + i = interned.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + if (value == null) { + //If value is null other Properties shouldn't contain key + if (!(otherHiveConfProperties.get(key) == null && otherHiveConfProperties.containsKey(key))) { + //We don't need to store in equals, we can instantly return false, since they can't be equal + return false; + } + } else { + if (!value.equals(otherHiveConfProperties.get(key))) { + return false; + } + } + } + } catch (ClassCastException unused) { + return false; + } catch (NullPointerException unused) { + return false; + } + //If we didn't return false until this point => this and other are equals + return true; + } else { + //other is not HiveConfProperties => not divided into 2 parts + if (t.size() != this.size()) { + return false; + } + try { + //Iterating through other instead of this => no need to merge non-interned and interned Properties + Iterator> i = t.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + if (value == null) { + if (!(this.get(key) == null && this.containsKey(key))) { + return false; + } + } else { + if (!value.equals(this.get(key))) { + return false; + } + } + } + } catch (ClassCastException unused) { + return false; + } catch (NullPointerException unused) { + return false; + } + return true; + } + + } + + /** + * Return true if non-interned and interned is empty. + */ + @Override + public synchronized boolean isEmpty() { + return super.isEmpty() && interned.isEmpty(); + } + + /** + * Iterating through both interned and non-interned Properties and collect keys. + */ + @Override + public synchronized Enumeration keys() { + Set keys = new HashSet<>(); + Iterator it; + if (super.entrySet() != null) { + it = super.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = (Map.Entry) it.next(); + keys.add(entry.getKey()); + } + } + it = interned.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry entry = (Map.Entry) it.next(); + keys.add(entry.getKey()); + } + return Collections.enumeration(keys); + } + + /** + * Merging interned and non-interned keys into a HashSet. + */ + @Override + public Set keySet() { + Set keys = new HashSet<>(); + keys.addAll(super.keySet()); + keys.addAll(interned.keySet()); + return keys; + } + + /** + * Merging interned and non-interned entries into a HashSet. + */ + @Override + public Set> entrySet() { + Set> entries = new HashSet<>(); + entries.addAll(interned.entrySet()); + entries.addAll(super.entrySet()); + return entries; + } + + /** + * Putting the Property in non-interned (super). + * Duplicates can happen this way: if interned already contains a property + * that is being added in the non-interned, there will be 2 properties with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same. + */ + @Override + public synchronized Object put(Object key, Object value) { + return super.put(key, value); + } + + /** + * Putting the properties in non-interned (super). + * Duplicates can happen this way: if interned already contains some properties + * that are being added in the non-interned, there will be duplicate with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same + */ + @Override + public synchronized void putAll(Map t) { + super.putAll(t); + } + + /************* Not used java.util.HashTable functions in Configuration class. ************/ + + @Override + public synchronized void clear() { + throw new NotImplementedException("HiveConfProperties.clear not implemented"); + } + + @Override + public synchronized Object compute(Object key, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.compute not implemented"); + } + + @Override + public synchronized Object computeIfAbsent(Object key, Function mappingFunction) { + throw new NotImplementedException("HiveConfProperties.computeIfAbsent not implemented"); + } + + @Override + public synchronized Object computeIfPresent(Object key, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.computeIfPresent not implemented"); + } + + @Override + public synchronized Enumeration elements() { + throw new NotImplementedException("HiveConfProperties.elements not implemented"); + } + + @Override + public synchronized void forEach(BiConsumer action) { + throw new NotImplementedException("HiveConfProperties.foreach not implemented"); + } + + @Override + public synchronized Object merge(Object key, Object value, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.merge not implemented"); + } + + @Override + public synchronized Object putIfAbsent(Object key, Object value) { + throw new NotImplementedException("HiveConfProperties.putIfAbsent not implemented"); + } + + @Override + public synchronized Object remove(Object key) { + throw new NotImplementedException("HiveConfProperties.remove not implemented"); + } + + @Override + public synchronized boolean remove(Object key, Object value) { + throw new NotImplementedException("HiveConfProperties.remove not implemented"); + } + + @Override + public synchronized Object replace(Object key, Object value) { + throw new NotImplementedException("HiveConfProperties.replace not implemented"); + } + + @Override + public synchronized boolean replace(Object key, Object oldValue, Object newValue) { + throw new NotImplementedException("HiveConfProperties.replace not implemented"); + } + + @Override + public synchronized void replaceAll(BiFunction function) { + throw new NotImplementedException("HiveConfProperties.replaceAll not implemented"); + } + + @Override + public synchronized String toString() { + throw new NotImplementedException("HiveConfProperties.toString not implemented"); + } + + @Override + public Collection values() { + throw new NotImplementedException("HiveConfProperties.values not implemented"); + } +} + diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 917aaeb433..40ba7d721a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -22,8 +22,10 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.reflect.FieldUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.HiveConfProperties; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.type.TimestampTZUtil; @@ -50,6 +52,7 @@ import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Field; import java.net.URI; import java.net.URL; import java.net.URLDecoder; @@ -5296,6 +5299,16 @@ public HiveConf(HiveConf other) { restrictList.addAll(other.restrictList); hiddenSet.addAll(other.hiddenSet); modWhiteListPattern = other.modWhiteListPattern; + + try { + Field propertiesField = FieldUtils.getDeclaredField(HiveConf.class.getSuperclass(), "properties", true); + if(!(propertiesField.get(other) instanceof HiveConfProperties)) { + HiveConfProperties properties = new HiveConfProperties((Properties) propertiesField.get(this)); + FieldUtils.writeField(this, "properties", properties, true); + } + } catch (IllegalAccessException e) { + e.printStackTrace(); + } } public Properties getAllProperties() { diff --git common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java new file mode 100644 index 0000000000..05813ac179 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.conf; + +import org.apache.hadoop.hive.common.HiveConfProperties; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Properties; + +/** + * Testing HiveConfProperties class. + */ +public class TestHiveConfProperties { + + private Properties propertiesBase; + private HiveConfProperties hiveConfProperties; + + @Before + public void init(){ + //Create Base properties + propertiesBase = new Properties(); + //Create HiveConfProperties, where propertiesBase will be interned + propertiesBase.setProperty("testKey1", "testValue1"); + hiveConfProperties = new HiveConfProperties(propertiesBase); + //adding a new Property so we have a non-interned value + hiveConfProperties.setProperty("testKey2", "testValue2"); + } + + @Test + public void testEquals(){ + //HiveConfProperties - HiveConfProperties: true + HiveConfProperties hiveConfProperties1 = new HiveConfProperties(propertiesBase); + hiveConfProperties1.setProperty("testKey2", "testValue2"); + Assert.assertTrue(hiveConfProperties.equals(hiveConfProperties1)); + + //HiveConfProperties - Properties: true + Properties properties2 = new Properties(); + properties2.setProperty("testKey2", "testValue2"); + properties2.setProperty("testKey1", "testValue1"); + Assert.assertTrue(hiveConfProperties.equals(properties2)); + + //Properties - HiveConfProperties: true + Assert.assertTrue(properties2.equals(hiveConfProperties)); + + //HiveConfProperties - Properties: False + Properties properties3 = new Properties(); + properties3.setProperty("testKey2", "testValue2"); + properties3.setProperty("testKey1", "testValue1"); + properties3.setProperty("testKey3", "testValue3"); + Assert.assertFalse(hiveConfProperties.equals(properties3)); + + //HiveConfProperties - HiveConfProperties from other base: True + Properties propertiesBase2 = new Properties(); + propertiesBase2.setProperty("testKey1", "testValue1"); + HiveConfProperties hiveConfProperties2 = new HiveConfProperties(propertiesBase2); + hiveConfProperties2.setProperty("testKey2", "testValue2"); + Assert.assertTrue(hiveConfProperties.equals(hiveConfProperties2)); + + //HiveConfProperties - HiveConfProperties from other base: False + propertiesBase2.setProperty("testKey3", "testValue3"); + HiveConfProperties hiveConfProperties3 = new HiveConfProperties(propertiesBase2); + Assert.assertFalse(hiveConfProperties3.equals(hiveConfProperties)); + + //HiveConfProperties - HiveConfProperties adding new value: False + hiveConfProperties1.setProperty("testKey3", "testValue3"); + Assert.assertFalse(hiveConfProperties.equals(hiveConfProperties1)); + + } + + @Test + public void testClone(){ + Properties properties2 = (Properties)hiveConfProperties.clone(); + Assert.assertTrue(hiveConfProperties.equals(properties2)); + } + + @Test + public void testContainsKey(){ + Assert.assertTrue(hiveConfProperties.containsKey("testKey1")); + Assert.assertTrue(hiveConfProperties.containsKey("testKey2")); + Assert.assertFalse(hiveConfProperties.containsKey("testKey3")); + } + + @Test + public void testGetProperty(){ + Assert.assertEquals("testValue1", hiveConfProperties.getProperty("testKey1")); + Assert.assertEquals("testValue2", hiveConfProperties.getProperty("testKey2")); + } +}