diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java index 2f7fa24558..30dc1b9da2 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java @@ -24,12 +24,12 @@ import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DataCache.BooleanRef; import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.cache.EvictionDispatcher; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.ql.io.SyntheticFileId; import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; public class OrcFileEstimateErrors extends LlapCacheableBuffer { private final Object fileKey; diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java index 0bbaf7e459..8a5bb5a1a0 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java @@ -34,14 +34,14 @@ import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata; import org.apache.orc.impl.OrcIndex; import org.apache.orc.StripeInformation; import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.orc.OrcProto; import org.junit.Test; import org.mockito.Mockito; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index add8bda8ee..ad80ff1dca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -25,11 +25,11 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.MemoryEstimate; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.debug.Utils; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java index 18bacc5dc2..856066236c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/aggregates/VectorUDAFBloomFilter.java @@ -40,8 +40,8 @@ import org.apache.hadoop.hive.ql.plan.AggregationDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBloomFilter.GenericUDAFBloomFilterEvaluator; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.IOUtils; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f2b9..1719cf1c4e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -20,11 +20,11 @@ import java.io.IOException; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1ba5..f269506ad3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -18,11 +18,11 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.util.JavaDataModel; public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index f42430dbf8..cbcb57a06b 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -21,7 +21,6 @@ import java.io.IOException; import org.apache.hadoop.hive.common.MemoryEstimate; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; @@ -29,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e808ef..a2c9490b70 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -20,7 +20,6 @@ import java.io.IOException; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; @@ -29,6 +28,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashTable; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 24dfa5d9a7..3651c1d35c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -19,7 +19,6 @@ import java.io.IOException; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -35,6 +34,7 @@ import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java index 74887f7ecd..e3ef9c42a6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java @@ -20,7 +20,6 @@ import java.io.IOException; -import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; @@ -32,6 +31,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index c47856de87..7a81d5c0e5 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -18,12 +18,10 @@ package org.apache.hadoop.hive.metastore.cache; -import java.io.Closeable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -33,8 +31,6 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -100,7 +96,6 @@ import org.apache.hadoop.hive.metastore.api.SchemaVersion; import org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor; import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.api.Type; @@ -161,9 +156,18 @@ public CachedStore() { public void setConf(Configuration conf) { setConfInternal(conf); initBlackListWhiteList(conf); + initSharedCache(conf); startCacheUpdateService(conf, false, true); } + private void initSharedCache(Configuration conf) { + long maxSharedCacheSizeInBytes = + MetastoreConf.getLongVar(conf, ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY); + sharedCache.initialize(maxSharedCacheSizeInBytes); + LOG.info("Maximum memory that the cache will use: {} GB", + maxSharedCacheSizeInBytes / 1024 * 1024 * 1024); + } + /** * Similar to setConf but used from within the tests * This does start the background thread for prewarm and update @@ -324,8 +328,21 @@ static void prewarm(RawStore rawStore) { tableColStats = rawStore.getTableColumnStatistics(catName, dbName, tblName, colNames); Deadline.stopTimer(); } - sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, + // If the table could not cached due to memory limit, stop prewarm + boolean isSuccess = sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, aggrStatsAllPartitions, aggrStatsAllButDefaultPartition); + if (!isSuccess) { + LOG.info( + "Unable to cache database: {}'s table: {}, since the cache memory is full. " + + "Will stop attempting to cache any more tables.", + dbName, tblName); + isCachePrewarmed.set(true); + LOG.info("CachedStore initialized"); + long endTime = System.nanoTime(); + LOG.info("Time taken in prewarming = " + (endTime - startTime) / 1000000 + "ms"); + sharedCache.completeTableCachePrewarm(); + return; + } } catch (MetaException | NoSuchObjectException e) { // Continue with next table continue; diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java index 89b400697b..2a33b3d7cb 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java @@ -33,7 +33,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.TreeMap; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -50,6 +49,8 @@ import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.StringUtils; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,10 +77,14 @@ private HashSet tablesDeletedDuringPrewarm = new HashSet<>(); private AtomicBoolean isTableCacheDirty = new AtomicBoolean(false); private Map sdCache = new HashMap<>(); - private Configuration conf; private static MessageDigest md; static final private Logger LOG = LoggerFactory.getLogger(SharedCache.class.getName()); private AtomicLong cacheUpdateCount = new AtomicLong(0); + private static long maxCacheSizeInBytes; + private static long currentCacheSizeInBytes = 0; + private static HashMap, ObjectEstimator> sizeEstimators = null; + private static ObjectEstimator tblWrapperSizeEstimator = null; + private static ObjectEstimator ptnWrapperSizeEstimator = null; enum StatsType { ALL(0), ALLBUTDEFAULT(1); @@ -103,6 +108,28 @@ public int getPosition() { } } + + public void initialize(long maxSharedCacheSizeInBytes) { + maxCacheSizeInBytes = maxSharedCacheSizeInBytes; + if (sizeEstimators == null) { + sizeEstimators = IncrementalObjectSizeEstimator.createEstimators(SharedCache.class); + } + if (tblWrapperSizeEstimator == null) { + tblWrapperSizeEstimator = sizeEstimators.get(TableWrapper.class); + } + if (tblWrapperSizeEstimator == null) { + IncrementalObjectSizeEstimator.createEstimators(TableWrapper.class, sizeEstimators); + tblWrapperSizeEstimator = sizeEstimators.get(TableWrapper.class); + } + if (ptnWrapperSizeEstimator == null) { + ptnWrapperSizeEstimator = sizeEstimators.get(PartitionWrapper.class); + } + if (ptnWrapperSizeEstimator == null) { + IncrementalObjectSizeEstimator.createEstimators(PartitionWrapper.class, sizeEstimators); + ptnWrapperSizeEstimator = sizeEstimators.get(PartitionWrapper.class); + } + } + static class TableWrapper { Table t; String location; @@ -116,7 +143,8 @@ public int getPosition() { private AtomicBoolean isTableColStatsCacheDirty = new AtomicBoolean(false); // For caching partition objects // Ket is partition values and the value is a wrapper around the partition object - private Map partitionCache = new ConcurrentHashMap(); + private Map partitionCache = + new ConcurrentHashMap(); private AtomicBoolean isPartitionCacheDirty = new AtomicBoolean(false); // For caching column stats for a partitioned table // Key is aggregate of partition values, column name and the value is the col stat object @@ -188,11 +216,17 @@ void cachePartition(Partition part, SharedCache sharedCache) { } } - void cachePartitions(List parts, SharedCache sharedCache) { + boolean cachePartitions(List parts, SharedCache sharedCache) { try { tableLock.writeLock().lock(); for (Partition part : parts) { PartitionWrapper wrapper = makePartitionWrapper(part, sharedCache); + long estimatedMemUsage = ptnWrapperSizeEstimator.estimate(wrapper, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } partitionCache.put(CacheUtils.buildPartitionCacheKey(part.getValues()), wrapper); isPartitionCacheDirty.set(true); } @@ -200,6 +234,7 @@ void cachePartitions(List parts, SharedCache sharedCache) { if (!aggrColStatsCache.isEmpty()) { aggrColStatsCache.clear(); } + return true; } finally { tableLock.writeLock().unlock(); } @@ -252,7 +287,8 @@ public Partition removePartition(List partVal, SharedCache sharedCache) Partition part = null; try { tableLock.writeLock().lock(); - PartitionWrapper wrapper = partitionCache.remove(CacheUtils.buildPartitionCacheKey(partVal)); + PartitionWrapper wrapper = + partitionCache.remove(CacheUtils.buildPartitionCacheKey(partVal)); isPartitionCacheDirty.set(true); if (wrapper.getSdHash() != null) { sharedCache.decrSd(wrapper.getSdHash()); @@ -412,7 +448,8 @@ public void removeTableColStats(String colName) { public ColumnStatisticsObj getPartitionColStats(List partVal, String colName) { try { tableLock.readLock().lock(); - return partitionColStatsCache.get(CacheUtils.buildPartitonColStatsCacheKey(partVal, colName)); + return partitionColStatsCache + .get(CacheUtils.buildPartitonColStatsCacheKey(partVal, colName)); } finally { tableLock.readLock().unlock(); } @@ -486,7 +523,8 @@ public void refreshPartitionColStats(List partitionColStats) { + getTable().getTableName() + "; the partition column list we have is dirty"); return; } - String key = CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); + String key = + CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); newPartitionColStatsCache.put(key, colStatObj.deepCopy()); } } catch (MetaException e) { @@ -801,7 +839,8 @@ public void populateDatabasesInCache(List databases) { // 1. Don't add databases that were deleted while we were preparing list for prewarm // 2. Skip overwriting exisiting db object // (which is present because it was added after prewarm started) - String key = CacheUtils.buildDbKey(dbCopy.getCatalogName().toLowerCase(), dbCopy.getName().toLowerCase()); + String key = CacheUtils.buildDbKey(dbCopy.getCatalogName().toLowerCase(), + dbCopy.getName().toLowerCase()); if (databasesDeletedDuringPrewarm.contains(key)) { continue; } @@ -855,7 +894,8 @@ public void removeDatabaseFromCache(String catName, String dbName) { cacheLock.readLock().lock(); for (String pair : databaseCache.keySet()) { String[] n = CacheUtils.splitDbName(pair); - if (catName.equals(n[0])) results.add(n[1]); + if (catName.equals(n[0])) + results.add(n[1]); } } finally { cacheLock.readLock().unlock(); @@ -883,8 +923,8 @@ public void removeDatabaseFromCache(String catName, String dbName) { } /** - * Replaces the old db object with the new one. - * This will add the new database to cache if it does not exist. + * Replaces the old db object with the new one. This will add the new database to cache if it does + * not exist. */ public void alterDatabaseInCache(String catName, String dbName, Database newDb) { try { @@ -922,7 +962,7 @@ public int getCachedDatabaseCount() { } } - public void populateTableInCache(Table table, ColumnStatistics tableColStats, + public boolean populateTableInCache(Table table, ColumnStatistics tableColStats, List partitions, List partitionColStats, AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { String catName = StringUtils.normalizeIdentifier(table.getCatName()); @@ -931,14 +971,24 @@ public void populateTableInCache(Table table, ColumnStatistics tableColStats, // Since we allow write operations on cache while prewarm is happening: // 1. Don't add tables that were deleted while we were preparing list for prewarm if (tablesDeletedDuringPrewarm.contains(CacheUtils.buildTableKey(catName, dbName, tableName))) { - return; + return false; } TableWrapper tblWrapper = createTableWrapper(catName, dbName, tableName, table); + long estimatedMemUsage = tblWrapperSizeEstimator.estimate(tblWrapper, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } if (!table.isSetPartitionKeys() && (tableColStats != null)) { tblWrapper.updateTableColStats(tableColStats.getStatsObj()); } else { if (partitions != null) { - tblWrapper.cachePartitions(partitions, this); + // If the partitions were not added due to memory limit, return false + boolean isSuccess = tblWrapper.cachePartitions(partitions, this); + if (!isSuccess) { + return isSuccess; + } } if (partitionColStats != null) { for (ColumnStatistics cs : partitionColStats) { @@ -960,12 +1010,15 @@ public void populateTableInCache(Table table, ColumnStatistics tableColStats, // 2. Skip overwriting exisiting table object // (which is present because it was added after prewarm started) tableCache.putIfAbsent(CacheUtils.buildTableKey(catName, dbName, tableName), tblWrapper); + return true; } finally { cacheLock.writeLock().unlock(); } } - + private static boolean isCacheMemoryFull(long estimatedMemUsage) { + return (0.8*maxCacheSizeInBytes) > (currentCacheSizeInBytes + estimatedMemUsage); + } public void completeTableCachePrewarm() { try { @@ -981,7 +1034,8 @@ public Table getTableFromCache(String catName, String dbName, String tableName) Table t = null; try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { t = CacheUtils.assemble(tblWrapper, this); } @@ -1003,7 +1057,8 @@ public TableWrapper addTableToCache(String catName, String dbName, String tblNam } } - private TableWrapper createTableWrapper(String catName, String dbName, String tblName, Table tbl) { + private TableWrapper createTableWrapper(String catName, String dbName, String tblName, + Table tbl) { TableWrapper wrapper; Table tblCopy = tbl.deepCopy(); tblCopy.setCatName(normalizeIdentifier(catName)); @@ -1026,7 +1081,6 @@ private TableWrapper createTableWrapper(String catName, String dbName, String tb return wrapper; } - public void removeTableFromCache(String catName, String dbName, String tblName) { try { cacheLock.writeLock().lock(); @@ -1035,7 +1089,8 @@ public void removeTableFromCache(String catName, String dbName, String tblName) if (!isTableCachePrewarmed) { tablesDeletedDuringPrewarm.add(CacheUtils.buildTableKey(catName, dbName, tblName)); } - TableWrapper tblWrapper = tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); byte[] sdHash = tblWrapper.getSdHash(); if (sdHash != null) { decrSd(sdHash); @@ -1049,7 +1104,8 @@ public void removeTableFromCache(String catName, String dbName, String tblName) public void alterTableInCache(String catName, String dbName, String tblName, Table newTable) { try { cacheLock.writeLock().lock(); - TableWrapper tblWrapper = tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); if (tblWrapper != null) { tblWrapper.updateTableObj(newTable, this); String newDbName = StringUtils.normalizeIdentifier(newTable.getDbName()); @@ -1092,7 +1148,8 @@ public void alterTableInCache(String catName, String dbName, String tblName, Tab return tableNames; } - public List listCachedTableNames(String catName, String dbName, String pattern, short maxTables) { + public List listCachedTableNames(String catName, String dbName, String pattern, + short maxTables) { List tableNames = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1111,7 +1168,8 @@ public void alterTableInCache(String catName, String dbName, String tblName, Tab return tableNames; } - public List listCachedTableNames(String catName, String dbName, String pattern, TableType tableType) { + public List listCachedTableNames(String catName, String dbName, String pattern, + TableType tableType) { List tableNames = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1138,7 +1196,8 @@ public void refreshTablesInCache(String catName, String dbName, List tabl Map newTableCache = new HashMap<>(); for (Table tbl : tables) { String tblName = StringUtils.normalizeIdentifier(tbl.getTableName()); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); if (tblWrapper != null) { tblWrapper.updateTableObj(tbl, this); } else { @@ -1153,8 +1212,8 @@ public void refreshTablesInCache(String catName, String dbName, List
tabl } } - public List getTableColStatsFromCache( - String catName, String dbName, String tblName, List colNames) { + public List getTableColStatsFromCache(String catName, String dbName, + String tblName, List colNames) { List colStatObjs = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1168,7 +1227,8 @@ public void refreshTablesInCache(String catName, String dbName, List
tabl return colStatObjs; } - public void removeTableColStatsFromCache(String catName, String dbName, String tblName, String colName) { + public void removeTableColStatsFromCache(String catName, String dbName, String tblName, + String colName) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1181,10 +1241,11 @@ public void removeTableColStatsFromCache(String catName, String dbName, String t } public void updateTableColStatsInCache(String catName, String dbName, String tableName, - List colStatsForTable) { + List colStatsForTable) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.updateTableColStats(colStatsForTable); } @@ -1194,10 +1255,11 @@ public void updateTableColStatsInCache(String catName, String dbName, String tab } public void refreshTableColStatsInCache(String catName, String dbName, String tableName, - List colStatsForTable) { + List colStatsForTable) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.refreshTableColStats(colStatsForTable); } @@ -1216,7 +1278,7 @@ public int getCachedTableCount() { } public List getTableMeta(String catName, String dbNames, String tableNames, - List tableTypes) { + List tableTypes) { List tableMetas = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1253,7 +1315,8 @@ public void addPartitionToCache(String catName, String dbName, String tblName, P } } - public void addPartitionsToCache(String catName, String dbName, String tblName, List parts) { + public void addPartitionsToCache(String catName, String dbName, String tblName, + List parts) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1266,7 +1329,7 @@ public void addPartitionsToCache(String catName, String dbName, String tblName, } public Partition getPartitionFromCache(String catName, String dbName, String tblName, - List partVals) { + List partVals) { Partition part = null; try { cacheLock.readLock().lock(); @@ -1280,7 +1343,8 @@ public Partition getPartitionFromCache(String catName, String dbName, String tbl return part; } - public boolean existPartitionFromCache(String catName, String dbName, String tblName, List partVals) { + public boolean existPartitionFromCache(String catName, String dbName, String tblName, + List partVals) { boolean existsPart = false; try { cacheLock.readLock().lock(); @@ -1295,7 +1359,7 @@ public boolean existPartitionFromCache(String catName, String dbName, String tbl } public Partition removePartitionFromCache(String catName, String dbName, String tblName, - List partVals) { + List partVals) { Partition part = null; try { cacheLock.readLock().lock(); @@ -1310,7 +1374,7 @@ public Partition removePartitionFromCache(String catName, String dbName, String } public void removePartitionsFromCache(String catName, String dbName, String tblName, - List> partVals) { + List> partVals) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1322,7 +1386,8 @@ public void removePartitionsFromCache(String catName, String dbName, String tblN } } - public List listCachedPartitions(String catName, String dbName, String tblName, int max) { + public List listCachedPartitions(String catName, String dbName, String tblName, + int max) { List parts = new ArrayList(); try { cacheLock.readLock().lock(); @@ -1336,8 +1401,8 @@ public void removePartitionsFromCache(String catName, String dbName, String tblN return parts; } - public void alterPartitionInCache(String catName, String dbName, String tblName, List partVals, - Partition newPart) { + public void alterPartitionInCache(String catName, String dbName, String tblName, + List partVals, Partition newPart) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1349,8 +1414,8 @@ public void alterPartitionInCache(String catName, String dbName, String tblName, } } - public void alterPartitionsInCache(String catName, String dbName, String tblName, List> partValsList, - List newParts) { + public void alterPartitionsInCache(String catName, String dbName, String tblName, + List> partValsList, List newParts) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1362,7 +1427,8 @@ public void alterPartitionsInCache(String catName, String dbName, String tblName } } - public void refreshPartitionsInCache(String catName, String dbName, String tblName, List partitions) { + public void refreshPartitionsInCache(String catName, String dbName, String tblName, + List partitions) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1375,7 +1441,7 @@ public void refreshPartitionsInCache(String catName, String dbName, String tblNa } public void removePartitionColStatsFromCache(String catName, String dbName, String tblName, - List partVals, String colName) { + List partVals, String colName) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1388,10 +1454,11 @@ public void removePartitionColStatsFromCache(String catName, String dbName, Stri } public void updatePartitionColStatsInCache(String catName, String dbName, String tableName, - List partVals, List colStatsObjs) { + List partVals, List colStatsObjs) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.updatePartitionColStats(partVals, colStatsObjs); } @@ -1400,13 +1467,13 @@ public void updatePartitionColStatsInCache(String catName, String dbName, String } } - public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String dbName, String tblName, - List partVal, String colName) { + public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String dbName, + String tblName, List partVal, String colName) { ColumnStatisticsObj colStatObj = null; try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); - if (tblWrapper != null){ + if (tblWrapper != null) { colStatObj = tblWrapper.getPartitionColStats(partVal, colName); } } finally { @@ -1416,7 +1483,7 @@ public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String } public void refreshPartitionColStatsInCache(String catName, String dbName, String tblName, - List partitionColStats) { + List partitionColStats) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1428,8 +1495,8 @@ public void refreshPartitionColStatsInCache(String catName, String dbName, Strin } } - public List getAggrStatsFromCache(String catName, String dbName, String tblName, - List colNames, StatsType statsType) { + public List getAggrStatsFromCache(String catName, String dbName, + String tblName, List colNames, StatsType statsType) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1443,11 +1510,11 @@ public void refreshPartitionColStatsInCache(String catName, String dbName, Strin } public void addAggregateStatsToCache(String catName, String dbName, String tblName, - AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { + AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); - if (tblWrapper != null){ + if (tblWrapper != null) { tblWrapper.cacheAggrPartitionColStats(aggrStatsAllPartitions, aggrStatsAllButDefaultPartition); } @@ -1457,7 +1524,7 @@ public void addAggregateStatsToCache(String catName, String dbName, String tblNa } public void refreshAggregateStatsInCache(String catName, String dbName, String tblName, - AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { + AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1529,8 +1596,3 @@ public void incrementUpdateCount() { cacheUpdateCount.incrementAndGet(); } } - - - - - diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 995137f967..1424117344 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -303,6 +303,9 @@ public static ConfVars getMetaConf(String name) { "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.whitelist. \n" + "Example: db2.*, db3\\.tbl1, db3\\..*. The last item can potentially override patterns specified before. \n" + "The blacklist also overrides the whitelist."), + CACHED_RAW_STORE_MAX_CACHE_MEMORY("metastore.cached.rawstore.max.cache.memory", + "hive.metastore.cached.rawstore.max.cache.memory", 1073741824, + "The maximum memory in bytes that the cached objects can use."), CAPABILITY_CHECK("metastore.client.capability.check", "hive.metastore.client.capability.check", true, "Whether to check client capabilities for potentially breaking API usage."), diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java similarity index 72% rename from llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java rename to storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java index 6f4ec6f1ea..100124a868 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java @@ -15,10 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.llap; +package org.apache.hadoop.hive.ql.util; -import com.google.common.collect.Lists; -import com.google.protobuf.UnknownFieldSet; import java.lang.reflect.AccessibleObject; import java.lang.reflect.Array; import java.lang.reflect.Field; @@ -37,19 +35,23 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; -import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; -import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; -import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Creates size estimators for java objects. The estimators attempt to do most of the reflection * work at initialization time, and also take some shortcuts, to minimize the amount of work done - * during the actual estimation. TODO: clean up + * during the actual estimation. + * TODO: clean up */ public class IncrementalObjectSizeEstimator { public static final JavaDataModel memoryModel = JavaDataModel.get(); - private enum FieldType { PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER }; + static final private Logger LOG = + LoggerFactory.getLogger(IncrementalObjectSizeEstimator.class.getName()); + + private enum FieldType { + PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER + }; public static HashMap, ObjectEstimator> createEstimators(Object rootObj) { HashMap, ObjectEstimator> byType = new HashMap<>(); @@ -67,7 +69,7 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti Object obj = stack.pop(); Class clazz; if (obj instanceof Class) { - clazz = (Class)obj; + clazz = (Class) obj; obj = null; } else { clazz = obj.getClass(); @@ -78,12 +80,15 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti // The object was added later for the same class; see addToProcessing. continue; } - if (estimator.isProcessed()) continue; + if (estimator.isProcessed()) + continue; estimator.init(); - for (Field field : getAllFields(clazz, LlapCacheableBuffer.class)) { + for (Field field : getAllFields(clazz)) { Class fieldClass = field.getType(); - if (Modifier.isStatic(field.getModifiers())) continue; - if (Class.class.isAssignableFrom(fieldClass)) continue; + if (Modifier.isStatic(field.getModifiers())) + continue; + if (Class.class.isAssignableFrom(fieldClass)) + continue; if (fieldClass.isPrimitive()) { estimator.addPrimitive(fieldClass); continue; @@ -116,8 +121,8 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti addToProcessing(byType, stack, fieldObj, fieldClass); } } - estimator.directSize = (int) JavaDataModel.alignUp( - estimator.directSize, memoryModel.memoryAlign()); + estimator.directSize = + (int) JavaDataModel.alignUp(estimator.directSize, memoryModel.memoryAlign()); } } @@ -126,7 +131,7 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti Deque stack = new ArrayDeque(32); Class rootClass = rootObj.getClass(); if (Class.class.equals(rootClass)) { - rootClass = (Class)rootObj; + rootClass = (Class) rootObj; rootObj = null; } else { // If root object is an array, map or collection, add estimators as for fields @@ -142,22 +147,16 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti return stack; } - private static void addHardcodedEstimators( - HashMap, ObjectEstimator> byType) { + private static void addHardcodedEstimators(HashMap, ObjectEstimator> byType) { // Add hacks for well-known collections and maps to avoid estimating them. - byType.put(ArrayList.class, new CollectionEstimator( - memoryModel.arrayList(), memoryModel.ref())); - byType.put(LinkedList.class, new CollectionEstimator( - memoryModel.linkedListBase(), memoryModel.linkedListEntry())); - byType.put(HashSet.class, new CollectionEstimator( - memoryModel.hashSetBase(), memoryModel.hashSetEntry())); - byType.put(HashMap.class, new CollectionEstimator( - memoryModel.hashMapBase(), memoryModel.hashMapEntry())); - // Add a hack for UnknownFieldSet because we assume it will never have anything (TODO: clear?) - ObjectEstimator ufsEstimator = new ObjectEstimator(false); - ufsEstimator.directSize = memoryModel.object() * 2 + memoryModel.ref(); - byType.put(UnknownFieldSet.class, ufsEstimator); - // TODO: 1-field hack for UnmodifiableCollection for protobuf too + byType.put(ArrayList.class, + new CollectionEstimator(memoryModel.arrayList(), memoryModel.ref())); + byType.put(LinkedList.class, + new CollectionEstimator(memoryModel.linkedListBase(), memoryModel.linkedListEntry())); + byType.put(HashSet.class, + new CollectionEstimator(memoryModel.hashSetBase(), memoryModel.hashSetEntry())); + byType.put(HashMap.class, + new CollectionEstimator(memoryModel.hashMapBase(), memoryModel.hashMapEntry())); } private static Object extractFieldObj(Object obj, Field field) { @@ -170,7 +169,8 @@ private static Object extractFieldObj(Object obj, Field field) { private static Class determineRealClass(HashMap, ObjectEstimator> byType, Deque stack, Field field, Class fieldClass, Object fieldObj) { - if (fieldObj == null) return fieldClass; + if (fieldObj == null) + return fieldClass; Class realFieldClass = fieldObj.getClass(); if (!fieldClass.equals(realFieldClass)) { addToProcessing(byType, stack, null, fieldClass); @@ -183,10 +183,10 @@ private static void addCollectionEstimator(HashMap, ObjectEstimator> by Deque stack, Field field, Class fieldClass, Object fieldObj) { Collection fieldCol = null; if (fieldObj != null) { - fieldCol = (Collection)fieldObj; + fieldCol = (Collection) fieldObj; if (fieldCol.size() == 0) { fieldCol = null; - LlapIoImpl.LOG.trace("Empty collection {}", field); + LOG.trace("Empty collection {}", field); } } if (fieldCol != null) { @@ -202,8 +202,8 @@ private static void addCollectionEstimator(HashMap, ObjectEstimator> by addToProcessing(byType, stack, null, collectionArg); } // TODO: there was code here to create guess-estimate for collection wrt how usage changes - // when removing elements. However it's too error-prone for anything involving - // pre-allocated capacity, so it was discarded. + // when removing elements. However it's too error-prone for anything involving + // pre-allocated capacity, so it was discarded. // We will estimate collection as an object (only if it's a field). addToProcessing(byType, stack, fieldObj, fieldClass); @@ -214,10 +214,10 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, Deque stack, Field field, Class fieldClass, Object fieldObj) { Map fieldCol = null; if (fieldObj != null) { - fieldCol = (Map)fieldObj; + fieldCol = (Map) fieldObj; if (fieldCol.size() == 0) { fieldCol = null; - LlapIoImpl.LOG.trace("Empty map {}", field); + LOG.trace("Empty map {}", field); } } if (fieldCol != null) { @@ -248,16 +248,16 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { - Type[] types = ((ParameterizedType)genericType).getActualTypeArguments(); + Type[] types = ((ParameterizedType) genericType).getActualTypeArguments(); if (types.length == 2 && types[0] instanceof Class && types[1] instanceof Class) { - return new Class[] { (Class)types[0], (Class)types[1] }; + return new Class[] { (Class) types[0], (Class) types[1] }; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. - LlapIoImpl.LOG.trace("Cannot determine map type: {}", field); + LOG.trace("Cannot determine map type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. - LlapIoImpl.LOG.trace("Non-parametrized map type: {}", field); + LOG.trace("Non-parametrized map type: {}", field); } return null; } @@ -266,26 +266,26 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { - Type type = ((ParameterizedType)genericType).getActualTypeArguments()[0]; + Type type = ((ParameterizedType) genericType).getActualTypeArguments()[0]; if (type instanceof Class) { - return (Class)type; + return (Class) type; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. - LlapIoImpl.LOG.trace("Cannot determine collection type: {}", field); + LOG.trace("Cannot determine collection type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. - LlapIoImpl.LOG.trace("Non-parametrized collection type: {}", field); + LOG.trace("Non-parametrized collection type: {}", field); } return null; } - private static void addArrayEstimator( - HashMap, ObjectEstimator> byType, Deque stack, - Field field, Object fieldObj) { - if (fieldObj == null) return; + private static void addArrayEstimator(HashMap, ObjectEstimator> byType, + Deque stack, Field field, Object fieldObj) { + if (fieldObj == null) + return; int arrayLen = Array.getLength(fieldObj); - LlapIoImpl.LOG.trace("Empty array {}", field); + LOG.trace("Empty array {}", field); for (int i = 0; i < arrayLen; ++i) { Object element = Array.get(fieldObj, i); if (element != null) { @@ -299,9 +299,11 @@ private static void addArrayEstimator( private static void addToProcessing(HashMap, ObjectEstimator> byType, Deque stack, Object element, Class elementClass) { ObjectEstimator existing = byType.get(elementClass); - if (existing != null && (!existing.isFromClass || (element == null))) return; + if (existing != null && (!existing.isFromClass || (element == null))) + return; if (elementClass.isInterface()) { - if (element == null) return; + if (element == null) + return; elementClass = element.getClass(); } byType.put(elementClass, new ObjectEstimator(element == null)); @@ -309,22 +311,25 @@ private static void addToProcessing(HashMap, ObjectEstimator> byType, } private static int getPrimitiveSize(Class fieldClass) { - if (fieldClass == long.class || fieldClass == double.class) return 8; - if (fieldClass == int.class || fieldClass == float.class) return 4; - if (fieldClass == short.class || fieldClass == char.class) return 2; - if (fieldClass == byte.class || fieldClass == boolean.class) return 1; - throw new AssertionError("Unrecognized primitive " + fieldClass.getName()); + if (fieldClass == long.class || fieldClass == double.class) + return 8; + if (fieldClass == int.class || fieldClass == float.class) + return 4; + if (fieldClass == short.class || fieldClass == char.class) + return 2; + if (fieldClass == byte.class || fieldClass == boolean.class) + return 1; + throw new AssertionError("Unrecognized primitive " + fieldClass.getName()); } - private static Iterable getAllFields(Class clazz, Class topClass) { - List fields = Lists.newArrayListWithCapacity(8); + private static Iterable getAllFields(Class clazz) { + List fields = new ArrayList<>(8); while (clazz != null) { fields.addAll(Arrays.asList(clazz.getDeclaredFields())); clazz = clazz.getSuperclass(); - if (clazz == topClass) break; } - //all together so there is only one security check + // all together so there is only one security check AccessibleObject.setAccessible(fields.toArray(new AccessibleObject[fields.size()]), true); return fields; } @@ -334,6 +339,7 @@ public FieldAndType(Field field, FieldType type) { this.field = field; this.type = type; } + public Field field; public FieldType type; } @@ -372,8 +378,7 @@ private void addField(FieldType type, Field field) { fields.add(new FieldAndType(field, type)); } - public int estimate( - Object obj, HashMap, ObjectEstimator> parent) { + public int estimate(Object obj, HashMap, ObjectEstimator> parent) { IdentityHashMap uniqueObjects = new IdentityHashMap<>(); uniqueObjects.put(obj, Boolean.TRUE); return estimate(obj, parent, uniqueObjects); @@ -382,7 +387,8 @@ public int estimate( protected int estimate(Object obj, HashMap, ObjectEstimator> parent, IdentityHashMap uniqueObjects) { // TODO: maybe use stack of est+obj pairs instead of recursion. - if (fields == null) return directSize; + if (fields == null) + return directSize; int referencedSize = 0; for (FieldAndType e : fields) { Object fieldObj; @@ -392,15 +398,17 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, throw new AssertionError("IAE: " + ex.getMessage()); } // reference is already accounted for in the directSize. - if (fieldObj == null) continue; - if (null != uniqueObjects.put(fieldObj, Boolean.TRUE)) continue; + if (fieldObj == null) + continue; + if (null != uniqueObjects.put(fieldObj, Boolean.TRUE)) + continue; switch (e.type) { case COLLECTION: { - Collection c = (Collection)fieldObj; + Collection c = (Collection) fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. - LlapIoImpl.LOG.trace("Approximate estimation for collection {} from {}", e.field, + LOG.trace("Approximate estimation for collection {} from {}", e.field, fieldObj.getClass().getName()); referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); @@ -408,43 +416,44 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); - referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(c.size()); + referencedSize += ((CollectionEstimator) collEstimator).estimateOverhead(c.size()); } else { // We decided to treat this collection as regular object. - LlapIoImpl.LOG.trace("Verbose estimation for collection {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Verbose estimation for collection {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += collEstimator.estimate(c, parent, uniqueObjects); } break; } case MAP: { - Map m = (Map)fieldObj; + Map m = (Map) fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. - LlapIoImpl.LOG.trace("Approximate estimation for map {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Approximate estimation for map {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); - referencedSize += memoryModel.array() + m.size() - * (memoryModel.ref() * 2 + memoryModel.object()); + referencedSize += + memoryModel.array() + m.size() * (memoryModel.ref() * 2 + memoryModel.object()); } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); - referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(m.size()); + referencedSize += ((CollectionEstimator) collEstimator).estimateOverhead(m.size()); } else { // We decided to treat this map as regular object. - LlapIoImpl.LOG.trace("Verbose estimation for map {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Verbose estimation for map {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += collEstimator.estimate(m, parent, uniqueObjects); } break; } case OBJECT_ARRAY: { int len = Array.getLength(fieldObj); - referencedSize += JavaDataModel.alignUp( - memoryModel.array() + len * memoryModel.ref(), memoryModel.memoryAlign()); - if (len == 0) continue; + referencedSize += JavaDataModel.alignUp(memoryModel.array() + len * memoryModel.ref(), + memoryModel.memoryAlign()); + if (len == 0) + continue; referencedSize += estimateArrayElements(parent, e, fieldObj, len, uniqueObjects); break; } @@ -463,13 +472,14 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, ObjectEstimator fieldEstimator = parent.get(fieldObj.getClass()); if (fieldEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure " - + fieldObj.getClass().getName() + " from " + e.field); + throw new AssertionError( + "Don't know how to measure " + fieldObj.getClass().getName() + " from " + e.field); } referencedSize += fieldEstimator.estimate(fieldObj, parent, uniqueObjects); break; } - default: throw new AssertionError("Unknown type " + e.type); + default: + throw new AssertionError("Unknown type " + e.type); } } return directSize + referencedSize; @@ -482,16 +492,18 @@ private int estimateArrayElements(HashMap, ObjectEstimator> parent, Fie ObjectEstimator lastEstimator = parent.get(lastClass); for (int i = 0; i < len; ++i) { Object element = Array.get(fieldObj, i); - if (element == null) continue; - if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; + if (element == null) + continue; + if (null != uniqueObjects.put(element, Boolean.TRUE)) + continue; Class elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure element " - + lastClass.getName() + " from " + e.field); + throw new AssertionError( + "Don't know how to measure element " + lastClass.getName() + " from " + e.field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); @@ -505,16 +517,18 @@ protected int estimateCollectionElements(HashMap, ObjectEstimator> pare Class lastClass = null; int result = 0; for (Object element : c) { - if (element == null) continue; - if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; + if (element == null) + continue; + if (null != uniqueObjects.put(element, Boolean.TRUE)) + continue; Class elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure element " - + lastClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure element " + lastClass.getName() + " from " + field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); @@ -522,35 +536,38 @@ protected int estimateCollectionElements(HashMap, ObjectEstimator> pare return result; } - protected int estimateMapElements(HashMap, ObjectEstimator> parent, - Map m, Field field, IdentityHashMap uniqueObjects) { + protected int estimateMapElements(HashMap, ObjectEstimator> parent, Map m, + Field field, IdentityHashMap uniqueObjects) { ObjectEstimator keyEstimator = null, valueEstimator = null; Class lastKeyClass = null, lastValueClass = null; int result = 0; for (Map.Entry element : m.entrySet()) { Object key = element.getKey(), value = element.getValue(); - if (null != uniqueObjects.put(key, Boolean.TRUE)) continue; + if (null != uniqueObjects.put(key, Boolean.TRUE)) + continue; Class keyClass = key.getClass(); if (lastKeyClass != keyClass) { lastKeyClass = keyClass; keyEstimator = parent.get(lastKeyClass); if (keyEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure key " - + lastKeyClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure key " + lastKeyClass.getName() + " from " + field); } } result += keyEstimator.estimate(element, parent, uniqueObjects); - if (value == null) continue; - if (null != uniqueObjects.put(value, Boolean.TRUE)) continue; + if (value == null) + continue; + if (null != uniqueObjects.put(value, Boolean.TRUE)) + continue; Class valueClass = value.getClass(); if (lastValueClass != valueClass) { lastValueClass = valueClass; valueEstimator = parent.get(lastValueClass); if (valueEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure value " - + lastValueClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure value " + lastValueClass.getName() + " from " + field); } } result += valueEstimator.estimate(element, parent, uniqueObjects); @@ -572,14 +589,14 @@ public CollectionEstimator(int base, int perElement) { protected int estimate(Object obj, HashMap, ObjectEstimator> parent, IdentityHashMap uniqueObjects) { if (obj instanceof Collection) { - Collection c = (Collection)obj; - int overhead = estimateOverhead(c.size()), elements = estimateCollectionElements( - parent, c, null, uniqueObjects); + Collection c = (Collection) obj; + int overhead = estimateOverhead(c.size()), + elements = estimateCollectionElements(parent, c, null, uniqueObjects); return overhead + elements + memoryModel.object(); } else if (obj instanceof Map) { - Map m = (Map)obj; - int overhead = estimateOverhead(m.size()), elements = estimateMapElements( - parent, m, null, uniqueObjects); + Map m = (Map) obj; + int overhead = estimateOverhead(m.size()), + elements = estimateMapElements(parent, m, null, uniqueObjects); return overhead + elements + memoryModel.object(); } throw new AssertionError(obj.getClass().getName()); @@ -591,13 +608,13 @@ int estimateOverhead(int size) { } public static void addEstimator(String className, - HashMap, ObjectEstimator> sizeEstimators) { + HashMap, ObjectEstimator> sizeEstimators, Class topClass) { Class clazz = null; try { clazz = Class.forName(className); } catch (ClassNotFoundException e) { // Ignore and hope for the best. - LlapIoImpl.LOG.warn("Cannot find " + className); + LOG.warn("Cannot find " + className); return; } IncrementalObjectSizeEstimator.createEstimators(clazz, sizeEstimators); diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/util/TestJavaDataModel.java b/storage-api/src/test/org/apache/hadoop/hive/ql/util/TestJavaDataModel.java index b9fe7c40f8..7aef1ee2fb 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/util/TestJavaDataModel.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/util/TestJavaDataModel.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertSame; +import org.apache.hadoop.hive.ql.util.JavaDataModel; + public final class TestJavaDataModel { private static final String DATA_MODEL_PROPERTY = "sun.arch.data.model";