diff --git common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java new file mode 100644 index 0000000000..a37b3e7c9f --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/HiveConfProperties.java @@ -0,0 +1,602 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; +import org.apache.commons.lang3.NotImplementedException; +import java.io.InputStream; +import java.io.PrintStream; +import java.util.Enumeration; +import java.util.HashSet; +import java.util.Properties; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.Map; +import java.util.Iterator; +import java.util.Collections; +import java.util.Collection; +import java.io.Reader; +import java.io.PrintWriter; +import java.io.OutputStream; +import java.io.IOException; + +/** + * Implementation of Properties to save memory. + * When multiple HiveConf objects are created. Most of the time these + * Configuration object have the same base and each session add their own + * Properties to it. With this implementation we can intern the base + * to prevent this overhead. + */ +public class HiveConfProperties extends Properties { + + private Properties interned; + + /** + * We can't allow removing from the interned Properties. + * So to be able to give a correct answer when getting a Property, + * we should store the Properties, and use this for getProperty and size functions. + * Based on how we use HiveConfs, it will only store a couple of values. + */ + private Properties removed; + + //Used for calculating size. + private int duplicatedPropertiesCount; + + private static Interner interner = Interners.newWeakInterner(); + + public HiveConfProperties(Properties p) { + interned = interner.intern(p); + removed = new Properties(); + duplicatedPropertiesCount=0; + } + + /************* Public API of java.util.Properties ************/ + + /** + * If non-interned (super) contains return that value. + * if not return the value from the base (interned), if it is not null. + */ + @Override + public String getProperty(String key) { + String property = super.getProperty(key); + if (property == null) { + //If it is not in the removed, then the Property is still valid (not removed). + if(interned != null && !removed.containsKey(key)) { + property = interned.getProperty(key); + } + } + return property; + } + + /** + * If non-interned (super) contains, return that value. + * - we can't use the super's getProperty(key, defaulValue) + * otherwise it'd return the defaultValue instead of the value from interned - + * if not return the value from the base (interned), if it is not null. + */ + @Override + public String getProperty(String key, String defaultValue) { + String property = super.getProperty(key); + if (property == null) { + //If it is not in the removed, then the Property is still valid (not removed). + if(interned != null && !removed.containsKey(key)) { + property = interned.getProperty(key, defaultValue); + } + } + return property; + } + + /** + * Setting the Property in non-interned (super). + * Duplicates can happen this way: if interned already contains a property + * that is being added in the non-interned, there will be 2 properties with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same. + */ + @Override + public synchronized Object setProperty(String key, String value) { + if(interned != null && interned.containsKey(key)){ + duplicatedPropertiesCount++; + } + return super.setProperty(key, value); + } + + /** + * Merge the keys of the two parts (interned, non-interned) and return. + */ + @Override + public Set stringPropertyNames() { + Set keys = new HashSet<>(); + if(interned != null) { + Iterator it = interned.keySet().iterator(); + while (it.hasNext()) { + String key = (String)it.next(); + //If it was removed, don't return with it + if(!removed.containsKey(key)) { + keys.add(key); + } + } + } + keys.addAll(super.stringPropertyNames()); + return keys; + } + + /** + * The same as HashTable's keys function. + */ + @Override + public Enumeration propertyNames() { + return keys(); + } + + /************* Not used java.util.Properties functions in Configuration class. ************/ + + @Override + public void list(PrintStream out) { + throw new NotImplementedException("HiveConfProperties.list not implemented"); + } + + @Override + public void list(PrintWriter out) { + throw new NotImplementedException("HiveConfProperties.list not implemented"); + } + + @Override + public synchronized void load(InputStream inStream) throws IOException { + throw new NotImplementedException("HiveConfProperties.load not implemented"); + } + + @Override + public synchronized void load(Reader reader) throws IOException { + throw new NotImplementedException("HiveConfProperties.load not implemented"); + } + + @Override + public synchronized void loadFromXML(InputStream inStream) throws IOException { + throw new NotImplementedException("HiveConfProperties.loadFromXML not implemented"); + } + + @Override + public void store(OutputStream out, String comments) throws IOException { + throw new NotImplementedException("HiveConfProperties.store not implemented"); + } + + @Override + public void storeToXML(OutputStream os, String comment) throws IOException { + throw new NotImplementedException("HiveConfProperties.storeToXML not implemented"); + } + + @Override + public void storeToXML(OutputStream os, String comment, String encoding) + throws IOException { + throw new NotImplementedException("HiveConfProperties.storeToXML not implemented"); + } + + /************* Public API of java.util.Hashtable ************/ + + /** + * Size of HiveConfProperties is the size of interned + size of non-interned - duplicatedPropertiesCount. + * We have to subtract the duplicatedPropertiesCount, because duplicates can happen: + * If we set a property which is already in the interned, we have a duplicate. + * Also we have to subtract the size of the removed Properties, since those had been removed + */ + @Override + public synchronized int size() { + if(interned != null) { + return super.size() + interned.size() - duplicatedPropertiesCount - removed.size(); + } + return super.size(); + } + + /** + * If non-interned (super) contains return that value. + * If not return the value from the base (interned), if it is not null. + */ + @Override + public synchronized Object get(Object key) { + Object o = super.get(key); + if (o == null) { + if(interned != null && !removed.containsKey(key)) { + o = interned.get(key); + } + } + return o; + } + + /** + * If non-interned (super) contains return that value. + * - we can't use the super's get(key, defaultValue) + * otherwise it'd return the defaultValue instead of the value from interned - + * If not return the value from the base (interned), if it is not null. + */ + @Override + public synchronized Object getOrDefault(Object key, Object defaultValue) { + Object property = super.get(key); + if (property == null) { + if(interned != null && !removed.containsKey(key)) { + property = interned.getOrDefault(key, defaultValue); + } + } + return property; + } + + /** + * Merge the two Properties (interned, non-interned) with putAll and return the merged Properties. + */ + @Override + public synchronized Object clone() { + Properties properties = new Properties(); + if(interned != null) { + Iterator it = interned.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry pair = (Map.Entry)it.next(); + if(!removed.containsKey(pair.getKey())) { + properties.setProperty((String) pair.getKey(), (String) pair.getValue()); + } + } + } + properties.putAll(this); + return properties; + } + + /** + * Return true if non-interned or interned contains the key. + */ + @Override + public synchronized boolean containsKey(Object key) { + if(interned != null) { + return super.containsKey(key) || interned.containsKey(key); + } + return super.containsKey(key); + } + + /** + * Return true if non-interned or interned contains the value. + */ + @Override + public synchronized boolean containsValue(Object value) { + if(interned != null) { + return super.containsValue(value) || interned.containsValue(value); + } + return super.containsValue(value); + } + + @Override + public synchronized int hashCode() { + if(interned != null) { + return super.hashCode() & interned.hashCode(); + } + return super.hashCode(); + } + + /** + * Implementation of equals follows the logic of the equals implementation in HashTable. + * But we need to divide into two parts: when other is HiveConfProperties and when not. + * If other is HiveConfProperties -> need to check the interned for equality as well. + */ + @Override + public synchronized boolean equals(Object o) { + if (o == this) { + return true; + } + + if (!(o instanceof Map)) { + return false; + } + Map t = (Map) o; + //If other is HiveConfProperties we need to check the interned Properties as well. + if (t instanceof HiveConfProperties) { + HiveConfProperties otherHiveConfProperties = (HiveConfProperties) t; + //If sizes do not match => they are not equals + if (otherHiveConfProperties.size() != this.size()) { + return false; + } + try { + Iterator> i = super.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + //If value is null "other" Properties shouldn't contain key + if (value == null) { + //Note that HiveConfProperties.get will be called so interned is counted + if (!(otherHiveConfProperties.get(key) == null && otherHiveConfProperties.containsKey(key))) { + return false; + } + } else { + //If value is not null, value from this should be equal to value from other + if (!value.equals(otherHiveConfProperties.get(key))) { + return false; + } + } + } + //Going through this.interned Properties as well + if(interned != null) { + i = interned.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + if (value == null) { + //If value is null other Properties shouldn't contain key + if (!(otherHiveConfProperties.get(key) == null && otherHiveConfProperties.containsKey(key))) { + //We don't need to store in equals, we can instantly return false, since they can't be equal + return false; + } + } else { + if (!value.equals(otherHiveConfProperties.get(key))) { + return false; + } + } + } + } + } catch (ClassCastException unused) { + return false; + } catch (NullPointerException unused) { + return false; + } + //If we didn't return false until this point => this and other are equals + return true; + } else { + //other is not HiveConfProperties => not divided into 2 parts + if (t.size() != this.size()) { + return false; + } + try { + //Iterating through other instead of this => no need to merge non-interned and interned Properties + Iterator> i = t.entrySet().iterator(); + while (i.hasNext()) { + Map.Entry e = i.next(); + String key = (String) e.getKey(); + String value = (String) e.getValue(); + if (value == null) { + if (!(this.get(key) == null && this.containsKey(key))) { + return false; + } + } else { + if (!value.equals(this.get(key))) { + return false; + } + } + } + } catch (ClassCastException unused) { + return false; + } catch (NullPointerException unused) { + return false; + } + return true; + } + + } + + /** + * Iterating through both interned and non-interned Properties and collect keys. + */ + @Override + public synchronized Enumeration keys() { + Set keys = new HashSet<>(); + Iterator it; + if(interned != null) { + it = interned.keySet().iterator(); + while (it.hasNext()) { + String key = (String)it.next(); + //If it was removed, don't return with it + if(removed != null && !removed.containsKey(key)) { + keys.add(key); + } + } + } + if (super.keySet() != null) { + it = super.keySet().iterator(); + while (it.hasNext()) { + String key = (String)it.next(); + keys.add(key); + } + } + return Collections.enumeration(keys); + } + + /** + * Merging interned and non-interned keys into a HashSet. + */ + @Override + public Set keySet() { + Set keys = new HashSet<>(); + if(interned != null) { + Iterator it = interned.keySet().iterator(); + while (it.hasNext()) { + String key = (String)it.next(); + //If it was removed, don't return with it + if(!removed.containsKey(key)) { + keys.add(key); + } + } + } + keys.addAll(super.keySet()); + return keys; + } + + /** + * Merging interned and non-interned entries into a HashSet. + */ + @Override + public Set> entrySet() { + Set> entries = new HashSet<>(); + if(interned != null) { + Iterator it = interned.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry pair = (Map.Entry)it.next(); + if(!removed.containsKey(pair.getKey())) { + entries.add(pair); + } + } + } + entries.addAll(super.entrySet()); + return entries; + } + + /** + * Putting the Property in non-interned (super). + * Duplicates can happen this way: if interned already contains a property + * that is being added in the non-interned, there will be 2 properties with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same. + */ + @Override + public synchronized Object put(Object key, Object value) { + return super.put(key, value); + } + + /** + * Putting the properties in non-interned (super). + * Duplicates can happen this way: if interned already contains some properties + * that are being added in the non-interned, there will be duplicate with the same key + * However this will not be a problem, because of the way getProperty works + * (We cannot remove it from interned, since other HiveConfs use the same + */ + @Override + public synchronized void putAll(Map t) { + super.putAll(t); + } + + /** + * We can't remove it from the interned Properties (other HiveConf-s may use it). + * First we copy the content of the interned Properties to the non-interned (this), + * then we set the interned to null. At this point we can remove, since it won't affect + * other HiveConfProperties. + */ + @Override + public synchronized Object remove(Object key) { + if(interned != null) { + if (interned.containsKey(key)) { + //We can't remove from interned + String v = interned.getProperty((String) key); + removed.setProperty((String) key, v); + return v; + } + } + return super.remove(key); + } + + /** + * We can't remove it from the interned Properties (other HiveConf-s may use it). + * First we copy the content of the interned Properties to the non-interned (this), + * then we set the interned to null. At this point we can remove, since it won't affect + * other HiveConfProperties. + */ + @Override + public synchronized boolean remove(Object key, Object value) { + if(interned != null) { + if (interned.containsKey(key)) { + //We can't remove from interned + removed.setProperty((String) key, (String) value); + return true; + } + } + return super.remove(key, value); + } + + /** + * If we want to clear, we can't empty the interned Properties (other HiveConf's may use it). + * so we just set it to null. + */ + @Override + public synchronized void clear() { + super.clear(); + removed.clear(); + interned = null; + duplicatedPropertiesCount=0; + } + + /************* Not used java.util.HashTable functions in Configuration class. ************/ + + @Override + public synchronized boolean contains(Object value) { + throw new NotImplementedException("HiveConfProperties.contains not implemented"); + } + + @Override + public synchronized boolean isEmpty() { + throw new NotImplementedException("HiveConfProperties.isEmpty not implemented"); + } + + @Override + public synchronized Object compute(Object key, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.compute not implemented"); + } + + @Override + public synchronized Object computeIfAbsent(Object key, Function mappingFunction) { + throw new NotImplementedException("HiveConfProperties.computeIfAbsent not implemented"); + } + + @Override + public synchronized Object computeIfPresent(Object key, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.computeIfPresent not implemented"); + } + + @Override + public synchronized Enumeration elements() { + throw new NotImplementedException("HiveConfProperties.elements not implemented"); + } + + @Override + public synchronized void forEach(BiConsumer action) { + throw new NotImplementedException("HiveConfProperties.foreach not implemented"); + } + + @Override + public synchronized Object merge(Object key, Object value, BiFunction remappingFunction) { + throw new NotImplementedException("HiveConfProperties.merge not implemented"); + } + + @Override + public synchronized Object putIfAbsent(Object key, Object value) { + throw new NotImplementedException("HiveConfProperties.putIfAbsent not implemented"); + } + + @Override + public synchronized Object replace(Object key, Object value) { + throw new NotImplementedException("HiveConfProperties.replace not implemented"); + } + + @Override + public synchronized boolean replace(Object key, Object oldValue, Object newValue) { + throw new NotImplementedException("HiveConfProperties.replace not implemented"); + } + + @Override + public synchronized void replaceAll(BiFunction function) { + throw new NotImplementedException("HiveConfProperties.replaceAll not implemented"); + } + + @Override + public synchronized String toString() { + throw new NotImplementedException("HiveConfProperties.toString not implemented"); + } + + @Override + public Collection values() { + throw new NotImplementedException("HiveConfProperties.values not implemented"); + } +} + diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 65264f323f..39b6ea49fa 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -22,8 +22,10 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang.reflect.FieldUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.FileUtils; +import org.apache.hadoop.hive.common.HiveConfProperties; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; import org.apache.hadoop.hive.common.type.TimestampTZUtil; @@ -50,6 +52,7 @@ import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; +import java.lang.reflect.Field; import java.net.URI; import java.net.URL; import java.net.URLDecoder; @@ -5300,6 +5303,16 @@ public HiveConf(HiveConf other) { restrictList.addAll(other.restrictList); hiddenSet.addAll(other.hiddenSet); modWhiteListPattern = other.modWhiteListPattern; + + try { + Field propertiesField = FieldUtils.getDeclaredField(HiveConf.class.getSuperclass(), "properties", true); + if(!(propertiesField.get(other) instanceof HiveConfProperties)) { + HiveConfProperties properties = new HiveConfProperties((Properties) propertiesField.get(this)); + FieldUtils.writeField(this, "properties", properties, true); + } + } catch (IllegalAccessException e) { + e.printStackTrace(); + } } public Properties getAllProperties() { diff --git common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java new file mode 100644 index 0000000000..4f8bff5cff --- /dev/null +++ common/src/test/org/apache/hadoop/hive/conf/TestHiveConfProperties.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.conf; + +import org.apache.hadoop.hive.common.HiveConfProperties; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.Properties; + +/** + * Testing HiveConfProperties class. + */ +public class TestHiveConfProperties { + + private Properties propertiesBase; + private HiveConfProperties hiveConfProperties; + + @Before + public void init(){ + //Create Base properties + propertiesBase = new Properties(); + //Create HiveConfProperties, where propertiesBase will be interned + propertiesBase.setProperty("testKey1", "testValue1"); + hiveConfProperties = new HiveConfProperties(propertiesBase); + //adding a new Property so we have a non-interned value + hiveConfProperties.setProperty("testKey2", "testValue2"); + } + + @Test + public void testEquals(){ + //HiveConfProperties - HiveConfProperties: true + HiveConfProperties hiveConfProperties1 = new HiveConfProperties(propertiesBase); + hiveConfProperties1.setProperty("testKey2", "testValue2"); + Assert.assertTrue(hiveConfProperties.equals(hiveConfProperties1)); + + //HiveConfProperties - Properties: true + Properties properties2 = new Properties(); + properties2.setProperty("testKey2", "testValue2"); + properties2.setProperty("testKey1", "testValue1"); + Assert.assertTrue(hiveConfProperties.equals(properties2)); + + //Properties - HiveConfProperties: true + Assert.assertTrue(properties2.equals(hiveConfProperties)); + + //HiveConfProperties - Properties: False + Properties properties3 = new Properties(); + properties3.setProperty("testKey2", "testValue2"); + properties3.setProperty("testKey1", "testValue1"); + properties3.setProperty("testKey3", "testValue3"); + Assert.assertFalse(hiveConfProperties.equals(properties3)); + + //HiveConfProperties - HiveConfProperties from other base: True + Properties propertiesBase2 = new Properties(); + propertiesBase2.setProperty("testKey1", "testValue1"); + HiveConfProperties hiveConfProperties2 = new HiveConfProperties(propertiesBase2); + hiveConfProperties2.setProperty("testKey2", "testValue2"); + Assert.assertTrue(hiveConfProperties.equals(hiveConfProperties2)); + + //HiveConfProperties - HiveConfProperties from other base: False + propertiesBase2.setProperty("testKey3", "testValue3"); + HiveConfProperties hiveConfProperties3 = new HiveConfProperties(propertiesBase2); + Assert.assertFalse(hiveConfProperties3.equals(hiveConfProperties)); + + //HiveConfProperties - HiveConfProperties adding new value: False + hiveConfProperties1.setProperty("testKey3", "testValue3"); + Assert.assertFalse(hiveConfProperties.equals(hiveConfProperties1)); + + } + + @Test + public void testClone(){ + Properties properties2 = (Properties)hiveConfProperties.clone(); + Assert.assertTrue(hiveConfProperties.equals(properties2)); + } + + @Test + public void testContainsKey(){ + Assert.assertTrue(hiveConfProperties.containsKey("testKey1")); + Assert.assertTrue(hiveConfProperties.containsKey("testKey2")); + Assert.assertFalse(hiveConfProperties.containsKey("testKey3")); + } + + @Test + public void testGetProperty(){ + Assert.assertEquals("testValue1", hiveConfProperties.getProperty("testKey1")); + Assert.assertEquals("testValue2", hiveConfProperties.getProperty("testKey2")); + } + + @Test + public void testClear(){ + hiveConfProperties.clear(); + Assert.assertEquals(0, hiveConfProperties.size()); + + hiveConfProperties.setProperty("testkey1", "testValue1"); + Assert.assertEquals("testValue1", hiveConfProperties.getProperty("testkey1")); + } + + @Test + public void testRemove() { + HiveConfProperties hiveConfProperties2 = new HiveConfProperties(propertiesBase); + + //Adding a Property that interned already contains, + //to check if size is correct after removing a Property that is duplicated + hiveConfProperties.setProperty("testKey1", "testValue1"); + + //Removing from interned + hiveConfProperties.remove("testKey1"); + Assert.assertEquals(1, hiveConfProperties.size()); + + //After removal, hiveConfProperties2 should still contain the previously removed Property + Assert.assertTrue(hiveConfProperties2.containsKey("testKey1")); + Properties p = new Properties(); + p.setProperty("testKey2", "testValue2"); + + Assert.assertTrue(hiveConfProperties.equals(p)); + } + + @Test + public void testRemove2() { + //Removing from non-interned + hiveConfProperties.remove("testKey2", "testValue2"); + Assert.assertEquals(1, hiveConfProperties.size()); + + Properties p = new Properties(); + p.setProperty("testKey1", "testValue1"); + + Assert.assertTrue(hiveConfProperties.equals(p)); + } +}