diff --git a/common/src/java/org/apache/hadoop/hive/common/LRUCache.java b/common/src/java/org/apache/hadoop/hive/common/LRUCache.java new file mode 100644 index 0000000..3d34a90 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/LRUCache.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import java.util.LinkedHashMap; +import java.util.Map; + +/** + * LRU cache implementation based on LinkedHashMap. + */ +public class LRUCache extends LinkedHashMap implements Map{ + + private static final long serialVersionUID = 1L; + + static final float DEFAULT_LOAD_FACTOR = 0.75f; + static final int DEFAULT_INITIAL_CAPACITY = 16; + + protected int maxSize; + + public LRUCache(int maxSize) { + this(maxSize, DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR); + } + + public LRUCache(int maxSize, int initialCapacity, float loadFactor) { + // Set LinkedHashMap with access-order for LRU + super(initialCapacity, loadFactor, true); + this.maxSize = maxSize; + } + + @Override + protected boolean removeEldestEntry(Map.Entry eldest) { + // LRU - most stale entry should be removed if cache is full. + return size() > maxSize; + } +} diff --git a/common/src/test/org/apache/hadoop/hive/common/TestLRUCache.java b/common/src/test/org/apache/hadoop/hive/common/TestLRUCache.java new file mode 100644 index 0000000..16f46f3 --- /dev/null +++ b/common/src/test/org/apache/hadoop/hive/common/TestLRUCache.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common; + +import static org.junit.Assert.*; + +import java.util.Map; + +import org.junit.Test; + +public class TestLRUCache { + @Test + public void testCache() throws Exception { + Map cache = new LRUCache(5); + + assertNull(null, cache.get("a")); + cache.put("a", "aaa"); + assertEquals("aaa", cache.get("a")); + + assertNull(null, cache.get("b")); + cache.put("b", "bbb"); + assertEquals("aaa", cache.get("a")); + assertEquals("bbb", cache.get("b")); + + // Replacement + cache.put("a", "abc"); + assertEquals(2, cache.size()); + assertEquals("abc", cache.get("a")); + } + + @Test + public void testEviction() throws Exception { + Map cache = new LRUCache(3); + + assertEquals(0, cache.size()); + cache.put("a", "aaa"); + cache.put("b", "bbb"); + cache.put("c", "ccc"); + assertEquals(3, cache.size()); + + // Cannot exceed cache size + cache.put("d", "ddd"); + assertEquals(3, cache.size()); + // verify contents + assertNull(cache.get("a")); + assertEquals("bbb", cache.get("b")); + assertEquals("ccc", cache.get("c")); + assertEquals("ddd", cache.get("d")); + + cache.get("b"); + cache.get("c"); + // Because of get() calls, LRU should be d + cache.put("a", "aaa"); + assertEquals(3, cache.size()); + assertEquals("aaa", cache.get("a")); + assertEquals("bbb", cache.get("b")); + assertEquals("ccc", cache.get("c")); + + cache.put("a", "aaa"); + // Because of put(), LRU should be b + cache.put("d", "ddd"); + assertEquals(3, cache.size()); + assertEquals("aaa", cache.get("a")); + assertEquals("ccc", cache.get("c")); + assertEquals("ddd", cache.get("d")); + + cache.remove("a"); + assertEquals(2, cache.size()); + cache.put("b", "bbb"); + assertEquals(3, cache.size()); + assertEquals("bbb", cache.get("b")); + assertEquals("ccc", cache.get("c")); + assertEquals("ddd", cache.get("d")); + } +} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index cb63d59..1ed3ea0 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -23,12 +23,14 @@ import java.lang.reflect.Type; import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.common.LRUCache; import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -63,7 +65,9 @@ JAVA, THRIFT, PROTOCOL_BUFFERS, AVRO }; - static ConcurrentHashMap objectInspectorCache = new ConcurrentHashMap(); + static final int DEFAULT_CACHE_SIZE = 1000; + static Map objectInspectorCache = + Collections.synchronizedMap(new LRUCache(DEFAULT_CACHE_SIZE)); public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) { @@ -230,8 +234,8 @@ private static ObjectInspector getReflectionObjectInspectorNoCache(Type t, } - static ConcurrentHashMap - cachedStandardListObjectInspector = new ConcurrentHashMap(); + static Map cachedStandardListObjectInspector = + Collections.synchronizedMap(new LRUCache(DEFAULT_CACHE_SIZE)); public static StandardListObjectInspector getStandardListObjectInspector( ObjectInspector listElementObjectInspector) { @@ -239,11 +243,7 @@ public static StandardListObjectInspector getStandardListObjectInspector( .get(listElementObjectInspector); if (result == null) { result = new StandardListObjectInspector(listElementObjectInspector); - StandardListObjectInspector prev = - cachedStandardListObjectInspector.putIfAbsent(listElementObjectInspector, result); - if (prev != null) { - result = prev; - } + result = addToCache(cachedStandardListObjectInspector, listElementObjectInspector, result); } return result; } @@ -253,8 +253,8 @@ public static StandardConstantListObjectInspector getStandardConstantListObjectI return new StandardConstantListObjectInspector(listElementObjectInspector, constantValue); } - static ConcurrentHashMap, StandardMapObjectInspector> cachedStandardMapObjectInspector = - new ConcurrentHashMap, StandardMapObjectInspector>(); + static Map, StandardMapObjectInspector> cachedStandardMapObjectInspector = + Collections.synchronizedMap(new LRUCache, StandardMapObjectInspector>(DEFAULT_CACHE_SIZE)); public static StandardMapObjectInspector getStandardMapObjectInspector( ObjectInspector mapKeyObjectInspector, @@ -267,11 +267,7 @@ public static StandardMapObjectInspector getStandardMapObjectInspector( if (result == null) { result = new StandardMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector); - StandardMapObjectInspector prev = - cachedStandardMapObjectInspector.putIfAbsent(signature, result); - if (prev != null) { - result = prev; - } + result = addToCache(cachedStandardMapObjectInspector, signature, result); } return result; } @@ -284,9 +280,8 @@ public static StandardConstantMapObjectInspector getStandardConstantMapObjectIns mapValueObjectInspector, constantValue); } - static ConcurrentHashMap, StandardUnionObjectInspector> - cachedStandardUnionObjectInspector = - new ConcurrentHashMap, StandardUnionObjectInspector>(); + static Map, StandardUnionObjectInspector> cachedStandardUnionObjectInspector = + Collections.synchronizedMap(new LRUCache, StandardUnionObjectInspector>(DEFAULT_CACHE_SIZE)); public static StandardUnionObjectInspector getStandardUnionObjectInspector( List unionObjectInspectors) { @@ -294,17 +289,13 @@ public static StandardUnionObjectInspector getStandardUnionObjectInspector( .get(unionObjectInspectors); if (result == null) { result = new StandardUnionObjectInspector(unionObjectInspectors); - StandardUnionObjectInspector prev = - cachedStandardUnionObjectInspector.putIfAbsent(unionObjectInspectors, result); - if (prev != null) { - result = prev; - } + result = addToCache(cachedStandardUnionObjectInspector, unionObjectInspectors, result); } return result; } - static ConcurrentHashMap>, StandardStructObjectInspector> cachedStandardStructObjectInspector = - new ConcurrentHashMap>, StandardStructObjectInspector>(); + static Map>, StandardStructObjectInspector> cachedStandardStructObjectInspector = + Collections.synchronizedMap(new LRUCache>, StandardStructObjectInspector>(DEFAULT_CACHE_SIZE)); public static StandardStructObjectInspector getStandardStructObjectInspector( List structFieldNames, @@ -327,11 +318,7 @@ public static StandardStructObjectInspector getStandardStructObjectInspector( StandardStructObjectInspector result = cachedStandardStructObjectInspector.get(signature); if (result == null) { result = new StandardStructObjectInspector(structFieldNames, structFieldObjectInspectors, structComments); - StandardStructObjectInspector prev = - cachedStandardStructObjectInspector.putIfAbsent(signature, result); - if (prev != null) { - result = prev; - } + result = addToCache(cachedStandardStructObjectInspector, signature, result); } return result; } @@ -342,8 +329,8 @@ public static StandardConstantStructObjectInspector getStandardConstantStructObj return new StandardConstantStructObjectInspector(structFieldNames, structFieldObjectInspectors, value); } - static ConcurrentHashMap, UnionStructObjectInspector> cachedUnionStructObjectInspector = - new ConcurrentHashMap, UnionStructObjectInspector>(); + static Map, UnionStructObjectInspector> cachedUnionStructObjectInspector = + Collections.synchronizedMap(new LRUCache, UnionStructObjectInspector>(DEFAULT_CACHE_SIZE)); public static UnionStructObjectInspector getUnionStructObjectInspector( List structObjectInspectors) { @@ -351,17 +338,13 @@ public static UnionStructObjectInspector getUnionStructObjectInspector( .get(structObjectInspectors); if (result == null) { result = new UnionStructObjectInspector(structObjectInspectors); - UnionStructObjectInspector prev = - cachedUnionStructObjectInspector.putIfAbsent(structObjectInspectors, result); - if (prev != null) { - result = prev; - } + result = addToCache(cachedUnionStructObjectInspector, structObjectInspectors, result); } return result; } - static ConcurrentHashMap, ColumnarStructObjectInspector> cachedColumnarStructObjectInspector = - new ConcurrentHashMap, ColumnarStructObjectInspector>(); + static Map, ColumnarStructObjectInspector> cachedColumnarStructObjectInspector = + Collections.synchronizedMap(new LRUCache, ColumnarStructObjectInspector>(DEFAULT_CACHE_SIZE)); public static ColumnarStructObjectInspector getColumnarStructObjectInspector( List structFieldNames, @@ -383,13 +366,72 @@ public static ColumnarStructObjectInspector getColumnarStructObjectInspector( if (result == null) { result = new ColumnarStructObjectInspector(structFieldNames, structFieldObjectInspectors, structFieldComments); - ColumnarStructObjectInspector prev = - cachedColumnarStructObjectInspector.putIfAbsent(signature, result); + result = addToCache(cachedColumnarStructObjectInspector, signature, result); + } + return result; + } + + static boolean hasConstantObjectInspector(ObjectInspector oi) { + if (oi instanceof ConstantObjectInspector) { + return true; + } + + // Recurse into nested types + switch (oi.getCategory()) { + case LIST: + ListObjectInspector listOI = (ListObjectInspector) oi; + return hasConstantObjectInspector(listOI.getListElementObjectInspector()); + case MAP: + MapObjectInspector mapOI = (MapObjectInspector) oi; + ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); + if (hasConstantObjectInspector(keyOI)) { + return true; + } + ObjectInspector valOI = mapOI.getMapValueObjectInspector(); + if (hasConstantObjectInspector(valOI)) { + return true; + } + break; + case STRUCT: + StructObjectInspector structOI = (StructObjectInspector) oi; + for (StructField structField : structOI.getAllStructFieldRefs()) { + if (hasConstantObjectInspector(structField.getFieldObjectInspector())) { + return true; + } + } + break; + case UNION: + UnionObjectInspector unionOI = (UnionObjectInspector) oi; + List tagOIs = unionOI.getObjectInspectors(); + for (ObjectInspector tagOI : tagOIs) { + if (hasConstantObjectInspector(tagOI)) { + return true; + } + } + break; + default: + break; + } + + return false; + } + + /** + * Adds the key, oi to the specified object inspector cache, if no previously cached item exists. + * @param key + * @param oi + * @param cache oi if it was added to the cache. If the key already had a cached item, then the + * existing item from the cache is returned. + * @return + */ + static V addToCache(Map cache, K key, V oi) { + if (!hasConstantObjectInspector(oi)) { + V prev = cache.putIfAbsent(key, oi); if (prev != null) { - result = prev; + oi = prev; } } - return result; + return oi; } private ObjectInspectorFactory() { diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorFactory.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorFactory.java new file mode 100644 index 0000000..140e8dc --- /dev/null +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorFactory.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import static org.junit.Assert.*; + +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.IntWritable; +import org.junit.Test; + +public class TestObjectInspectorFactory { + @Test + public void testHasConstantObjectInspector() throws Exception { + ObjectInspector oi; + + ObjectInspector nonConstOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.intTypeInfo, new IntWritable(100)); + + // Primitive + assertFalse(ObjectInspectorFactory.hasConstantObjectInspector(nonConstOI)); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(constOI)); + + // List + oi = ObjectInspectorFactory.getStandardListObjectInspector(nonConstOI); + assertFalse(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + oi = ObjectInspectorFactory.getStandardListObjectInspector(constOI); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + // Map + oi = ObjectInspectorFactory.getStandardMapObjectInspector(nonConstOI, nonConstOI); + assertFalse(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + oi = ObjectInspectorFactory.getStandardMapObjectInspector(nonConstOI, constOI); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + oi = ObjectInspectorFactory.getStandardMapObjectInspector(constOI, nonConstOI); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + + // Struct + List fieldNames = Arrays.asList("a", "b"); + List fieldOIs = Arrays.asList(nonConstOI, nonConstOI); + oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + assertFalse(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + fieldOIs = Arrays.asList( + nonConstOI, + ObjectInspectorFactory.getStandardMapObjectInspector(nonConstOI, nonConstOI)); + oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + assertFalse(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + fieldOIs = Arrays.asList( + constOI, + ObjectInspectorFactory.getStandardMapObjectInspector(nonConstOI, nonConstOI)); + oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + + fieldOIs = Arrays.asList( + nonConstOI, + ObjectInspectorFactory.getStandardMapObjectInspector(nonConstOI, constOI)); + oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + assertTrue(ObjectInspectorFactory.hasConstantObjectInspector(oi)); + } + + @Test + public void testObjectInspectorFactory() { + ObjectInspector nonConstOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector; + ObjectInspector constOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( + TypeInfoFactory.intTypeInfo, new IntWritable(100)); + + ObjectInspectorFactory.cachedStandardStructObjectInspector.clear(); + assertEquals(0, ObjectInspectorFactory.cachedStandardStructObjectInspector.size()); + + List fieldNames = Arrays.asList("a", "b"); + List fieldOIs = Arrays.asList(constOI, nonConstOI); + ObjectInspector oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + // Struct ObjInspector with const value should not be cached + assertEquals(0, ObjectInspectorFactory.cachedStandardStructObjectInspector.size()); + + fieldOIs = Arrays.asList(nonConstOI, nonConstOI); + oi = ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + // No const obj inspectors - should be ok to cache. + assertEquals(1, ObjectInspectorFactory.cachedStandardStructObjectInspector.size()); + } +}