diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java index 2f7fa24558..30dc1b9da2 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java +++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/metadata/OrcFileEstimateErrors.java @@ -24,12 +24,12 @@ import org.apache.hadoop.hive.common.io.DiskRangeList; import org.apache.hadoop.hive.common.io.DataCache.BooleanRef; import org.apache.hadoop.hive.common.io.DiskRangeList.MutateHelper; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.cache.EvictionDispatcher; import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; import org.apache.hadoop.hive.ql.io.SyntheticFileId; import org.apache.hadoop.hive.ql.io.orc.encoded.IncompleteCb; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; public class OrcFileEstimateErrors extends LlapCacheableBuffer { private final Object fileKey; diff --git a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java index 0bbaf7e459..652a29f1d5 100644 --- a/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java +++ b/llap-server/src/test/org/apache/hadoop/hive/llap/cache/TestIncrementalObjectSizeEstimator.java @@ -34,13 +34,13 @@ import org.apache.orc.TypeDescription; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.llap.io.metadata.OrcFileMetadata; import org.apache.hadoop.hive.llap.io.metadata.OrcStripeMetadata; import org.apache.orc.impl.OrcIndex; import org.apache.orc.StripeInformation; import org.apache.hadoop.hive.ql.io.orc.encoded.OrcBatchKey; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.orc.OrcProto; import org.junit.Test; diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java index c47856de87..6312e4790b 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java @@ -18,12 +18,10 @@ package org.apache.hadoop.hive.metastore.cache; -import java.io.Closeable; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.HashMap; import java.util.LinkedList; import java.util.List; @@ -33,8 +31,6 @@ import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -100,7 +96,6 @@ import org.apache.hadoop.hive.metastore.api.SchemaVersion; import org.apache.hadoop.hive.metastore.api.SchemaVersionDescriptor; import org.apache.hadoop.hive.metastore.api.SerDeInfo; -import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.api.Type; @@ -161,9 +156,18 @@ public CachedStore() { public void setConf(Configuration conf) { setConfInternal(conf); initBlackListWhiteList(conf); + initSharedCache(conf); startCacheUpdateService(conf, false, true); } + private void initSharedCache(Configuration conf) { + long maxSharedCacheSizeInBytes = + MetastoreConf.getLongVar(conf, ConfVars.CACHED_RAW_STORE_MAX_CACHE_MEMORY); + sharedCache.initialize(maxSharedCacheSizeInBytes); + LOG.info("Maximum memory that the cache will use: {} GB", + maxSharedCacheSizeInBytes / 1024 * 1024 * 1024); + } + /** * Similar to setConf but used from within the tests * This does start the background thread for prewarm and update @@ -205,18 +209,27 @@ static void prewarm(RawStore rawStore) { } long startTime = System.nanoTime(); LOG.info("Prewarming CachedStore"); + boolean isSuccess; while (!isCachePrewarmed.get()) { // Prevents throwing exceptions in our raw store calls since we're not using RawStoreProxy Deadline.registerIfNot(1000000); Collection catalogsToCache; try { catalogsToCache = catalogsToCache(rawStore); - LOG.info("Going to cache catalogs: " + - org.apache.commons.lang.StringUtils.join(catalogsToCache, ", ")); + LOG.info("Going to cache catalogs: " + + org.apache.commons.lang.StringUtils.join(catalogsToCache, ", ")); List catalogs = new ArrayList<>(catalogsToCache.size()); - for (String catName : catalogsToCache) catalogs.add(rawStore.getCatalog(catName)); - sharedCache.populateCatalogsInCache(catalogs); - } catch (MetaException|NoSuchObjectException e) { + for (String catName : catalogsToCache) + catalogs.add(rawStore.getCatalog(catName)); + isSuccess = sharedCache.populateCatalogsInCache(catalogs); + if (!isSuccess) { + LOG.info( + "Unable to cache catalogs, since the cache memory is full. " + + "Will stop attempting to cache any more objects."); + completePrewarm(startTime); + return; + } + } catch (MetaException | NoSuchObjectException e) { LOG.warn("Failed to populate catalogs in cache, going to try again", e); // try again continue; @@ -232,15 +245,22 @@ static void prewarm(RawStore rawStore) { databases.add(rawStore.getDatabase(catName, dbName)); } catch (NoSuchObjectException e) { // Continue with next database - LOG.warn("Failed to cache database " + - Warehouse.getCatalogQualifiedDbName(catName, dbName) + ", moving on", e); + LOG.warn("Failed to cache database " + + Warehouse.getCatalogQualifiedDbName(catName, dbName) + ", moving on", e); } } } catch (MetaException e) { LOG.warn("Failed to cache databases in catalog " + catName + ", moving on", e); } } - sharedCache.populateDatabasesInCache(databases); + isSuccess = sharedCache.populateDatabasesInCache(databases); + if (!isSuccess) { + LOG.info( + "Unable to cache databases, since the cache memory is full. " + + "Will stop attempting to cache any more objects."); + completePrewarm(startTime); + return; + } LOG.debug( "Databases cache is now prewarmed. Now adding tables, partitions and statistics to the cache"); int numberOfDatabasesCachedSoFar = 0; @@ -261,7 +281,6 @@ static void prewarm(RawStore rawStore) { tblName = StringUtils.normalizeIdentifier(tblName); if (!shouldCacheTable(catName, dbName, tblName)) { continue; - } Table table; try { @@ -324,8 +343,17 @@ static void prewarm(RawStore rawStore) { tableColStats = rawStore.getTableColumnStatistics(catName, dbName, tblName, colNames); Deadline.stopTimer(); } - sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, + // If the table could not cached due to memory limit, stop prewarm + isSuccess = sharedCache.populateTableInCache(table, tableColStats, partitions, partitionColStats, aggrStatsAllPartitions, aggrStatsAllButDefaultPartition); + if (!isSuccess) { + LOG.info( + "Unable to cache database: {}'s table: {}, since the cache memory is full. " + + "Will stop attempting to cache any more tables.", + dbName, tblName); + completePrewarm(startTime); + return; + } } catch (MetaException | NoSuchObjectException e) { // Continue with next table continue; @@ -336,8 +364,12 @@ static void prewarm(RawStore rawStore) { LOG.debug("Processed database: {}. Cached {} / {} databases so far.", dbName, ++numberOfDatabasesCachedSoFar, databases.size()); } - isCachePrewarmed.set(true); } + completePrewarm(startTime); + } + + private static void completePrewarm(long startTime) { + isCachePrewarmed.set(true); LOG.info("CachedStore initialized"); long endTime = System.nanoTime(); LOG.info("Time taken in prewarming = " + (endTime - startTime) / 1000000 + "ms"); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java index 89b400697b..765fd0a4c3 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/SharedCache.java @@ -33,7 +33,6 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.TreeMap; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -50,6 +49,8 @@ import org.apache.hadoop.hive.metastore.api.TableMeta; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; import org.apache.hadoop.hive.metastore.utils.StringUtils; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator; +import org.apache.hadoop.hive.ql.util.IncrementalObjectSizeEstimator.ObjectEstimator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -76,10 +77,12 @@ private HashSet tablesDeletedDuringPrewarm = new HashSet<>(); private AtomicBoolean isTableCacheDirty = new AtomicBoolean(false); private Map sdCache = new HashMap<>(); - private Configuration conf; private static MessageDigest md; static final private Logger LOG = LoggerFactory.getLogger(SharedCache.class.getName()); private AtomicLong cacheUpdateCount = new AtomicLong(0); + private static long maxCacheSizeInBytes; + private static long currentCacheSizeInBytes = 0; + private static HashMap, ObjectEstimator> sizeEstimators = null; enum StatsType { ALL(0), ALLBUTDEFAULT(1); @@ -103,6 +106,27 @@ public int getPosition() { } } + + public void initialize(long maxSharedCacheSizeInBytes) { + maxCacheSizeInBytes = maxSharedCacheSizeInBytes; + // Create estimators + if (sizeEstimators == null) { + sizeEstimators = IncrementalObjectSizeEstimator.createEstimators(SharedCache.class); + } + // Initialize the SharedCache size + ObjectEstimator sharedCacheEstimator = getMemorySizeEstimator(SharedCache.class); + currentCacheSizeInBytes = sharedCacheEstimator.estimate(this, sizeEstimators); + } + + static ObjectEstimator getMemorySizeEstimator(Class clazz) { + ObjectEstimator estimator = sizeEstimators.get(clazz); + if (estimator == null) { + IncrementalObjectSizeEstimator.createEstimators(clazz, sizeEstimators); + estimator = sizeEstimators.get(clazz); + } + return estimator; + } + static class TableWrapper { Table t; String location; @@ -116,7 +140,8 @@ public int getPosition() { private AtomicBoolean isTableColStatsCacheDirty = new AtomicBoolean(false); // For caching partition objects // Ket is partition values and the value is a wrapper around the partition object - private Map partitionCache = new ConcurrentHashMap(); + private Map partitionCache = + new ConcurrentHashMap(); private AtomicBoolean isPartitionCacheDirty = new AtomicBoolean(false); // For caching column stats for a partitioned table // Key is aggregate of partition values, column name and the value is the col stat object @@ -188,18 +213,26 @@ void cachePartition(Partition part, SharedCache sharedCache) { } } - void cachePartitions(List parts, SharedCache sharedCache) { + boolean cachePartitions(List parts, SharedCache sharedCache) { try { tableLock.writeLock().lock(); for (Partition part : parts) { - PartitionWrapper wrapper = makePartitionWrapper(part, sharedCache); - partitionCache.put(CacheUtils.buildPartitionCacheKey(part.getValues()), wrapper); + PartitionWrapper ptnWrapper = makePartitionWrapper(part, sharedCache); + ObjectEstimator ptnWrapperSizeEstimator = getMemorySizeEstimator(PartitionWrapper.class); + long estimatedMemUsage = ptnWrapperSizeEstimator.estimate(ptnWrapper, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } + partitionCache.put(CacheUtils.buildPartitionCacheKey(part.getValues()), ptnWrapper); isPartitionCacheDirty.set(true); } // Invalidate cached aggregate stats if (!aggrColStatsCache.isEmpty()) { aggrColStatsCache.clear(); } + return true; } finally { tableLock.writeLock().unlock(); } @@ -252,7 +285,8 @@ public Partition removePartition(List partVal, SharedCache sharedCache) Partition part = null; try { tableLock.writeLock().lock(); - PartitionWrapper wrapper = partitionCache.remove(CacheUtils.buildPartitionCacheKey(partVal)); + PartitionWrapper wrapper = + partitionCache.remove(CacheUtils.buildPartitionCacheKey(partVal)); isPartitionCacheDirty.set(true); if (wrapper.getSdHash() != null) { sharedCache.decrSd(wrapper.getSdHash()); @@ -340,8 +374,10 @@ public void refreshPartitions(List partitions, SharedCache sharedCach } } - public void updateTableColStats(List colStatsForTable) { + public boolean updateTableColStats(List colStatsForTable) { try { + ObjectEstimator tblColStatsSizeEstimator = + getMemorySizeEstimator(ColumnStatisticsObj.class); tableLock.writeLock().lock(); for (ColumnStatisticsObj colStatObj : colStatsForTable) { // Get old stats object if present @@ -353,10 +389,17 @@ public void updateTableColStats(List colStatsForTable) { } else { // No stats exist for this key; add a new object to the cache // TODO: get rid of deepCopy after making sure callers don't use references + long estimatedMemUsage = tblColStatsSizeEstimator.estimate(colStatObj, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } tableColStatsCache.put(key, colStatObj.deepCopy()); } } isTableColStatsCacheDirty.set(true); + return true; } finally { tableLock.writeLock().unlock(); } @@ -412,17 +455,38 @@ public void removeTableColStats(String colName) { public ColumnStatisticsObj getPartitionColStats(List partVal, String colName) { try { tableLock.readLock().lock(); - return partitionColStatsCache.get(CacheUtils.buildPartitonColStatsCacheKey(partVal, colName)); + return partitionColStatsCache + .get(CacheUtils.buildPartitonColStatsCacheKey(partVal, colName)); } finally { tableLock.readLock().unlock(); } } - public void updatePartitionColStats(List partVal, + public boolean updatePartitionColStats(List partVal, List colStatsObjs) { try { tableLock.writeLock().lock(); - addPartitionColStatsToCache(partVal, colStatsObjs); + ObjectEstimator ptnColStatsSizeEstimator = + getMemorySizeEstimator(ColumnStatisticsObj.class); + for (ColumnStatisticsObj colStatObj : colStatsObjs) { + // Get old stats object if present + String key = CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); + ColumnStatisticsObj oldStatsObj = partitionColStatsCache.get(key); + if (oldStatsObj != null) { + // Update existing stat object's field + StatObjectConverter.setFieldsIntoOldStats(oldStatsObj, colStatObj); + } else { + // No stats exist for this key; add a new object to the cache + // TODO: get rid of deepCopy after making sure callers don't use references + long estimatedMemUsage = ptnColStatsSizeEstimator.estimate(colStatObj, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } + partitionColStatsCache.put(key, colStatObj.deepCopy()); + } + } isPartitionColStatsCacheDirty.set(true); // Invalidate cached aggregate stats if (!aggrColStatsCache.isEmpty()) { @@ -431,6 +495,7 @@ public void updatePartitionColStats(List partVal, } finally { tableLock.writeLock().unlock(); } + return true; } public void removePartitionColStats(List partVals, String colName) { @@ -447,23 +512,6 @@ public void removePartitionColStats(List partVals, String colName) { } } - private void addPartitionColStatsToCache(List partVal, - List colStatsObjs) { - for (ColumnStatisticsObj colStatObj : colStatsObjs) { - // Get old stats object if present - String key = CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); - ColumnStatisticsObj oldStatsObj = partitionColStatsCache.get(key); - if (oldStatsObj != null) { - // Update existing stat object's field - StatObjectConverter.setFieldsIntoOldStats(oldStatsObj, colStatObj); - } else { - // No stats exist for this key; add a new object to the cache - // TODO: get rid of deepCopy after making sure callers don't use references - partitionColStatsCache.put(key, colStatObj.deepCopy()); - } - } - } - public void refreshPartitionColStats(List partitionColStats) { Map newPartitionColStatsCache = new HashMap(); @@ -486,7 +534,8 @@ public void refreshPartitionColStats(List partitionColStats) { + getTable().getTableName() + "; the partition column list we have is dirty"); return; } - String key = CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); + String key = + CacheUtils.buildPartitonColStatsCacheKey(partVal, colStatObj.getColName()); newPartitionColStatsCache.put(key, colStatObj.deepCopy()); } } catch (MetaException e) { @@ -687,11 +736,18 @@ public int getRefCount() { } } - public void populateCatalogsInCache(Collection catalogs) { + public boolean populateCatalogsInCache(Collection catalogs) { for (Catalog cat : catalogs) { Catalog catCopy = cat.deepCopy(); // ObjectStore also stores db name in lowercase catCopy.setName(catCopy.getName().toLowerCase()); + ObjectEstimator catalogSizeEstimator = getMemorySizeEstimator(Catalog.class); + long estimatedMemUsage = catalogSizeEstimator.estimate(cat, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } try { cacheLock.writeLock().lock(); // Since we allow write operations on cache while prewarm is happening: @@ -708,6 +764,7 @@ public void populateCatalogsInCache(Collection catalogs) { cacheLock.writeLock().unlock(); } } + return true; } public Catalog getCatalogFromCache(String name) { @@ -790,18 +847,26 @@ public Database getDatabaseFromCache(String catName, String name) { return db; } - public void populateDatabasesInCache(List databases) { + public boolean populateDatabasesInCache(List databases) { for (Database db : databases) { Database dbCopy = db.deepCopy(); // ObjectStore also stores db name in lowercase dbCopy.setName(dbCopy.getName().toLowerCase()); + ObjectEstimator databaseSizeEstimator = getMemorySizeEstimator(Database.class); + long estimatedMemUsage = databaseSizeEstimator.estimate(db, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } try { cacheLock.writeLock().lock(); // Since we allow write operations on cache while prewarm is happening: // 1. Don't add databases that were deleted while we were preparing list for prewarm // 2. Skip overwriting exisiting db object // (which is present because it was added after prewarm started) - String key = CacheUtils.buildDbKey(dbCopy.getCatalogName().toLowerCase(), dbCopy.getName().toLowerCase()); + String key = CacheUtils.buildDbKey(dbCopy.getCatalogName().toLowerCase(), + dbCopy.getName().toLowerCase()); if (databasesDeletedDuringPrewarm.contains(key)) { continue; } @@ -812,6 +877,7 @@ public void populateDatabasesInCache(List databases) { cacheLock.writeLock().unlock(); } } + return true; } public boolean isDatabaseCachePrewarmed() { @@ -855,7 +921,8 @@ public void removeDatabaseFromCache(String catName, String dbName) { cacheLock.readLock().lock(); for (String pair : databaseCache.keySet()) { String[] n = CacheUtils.splitDbName(pair); - if (catName.equals(n[0])) results.add(n[1]); + if (catName.equals(n[0])) + results.add(n[1]); } } finally { cacheLock.readLock().unlock(); @@ -883,8 +950,8 @@ public void removeDatabaseFromCache(String catName, String dbName) { } /** - * Replaces the old db object with the new one. - * This will add the new database to cache if it does not exist. + * Replaces the old db object with the new one. This will add the new database to cache if it does + * not exist. */ public void alterDatabaseInCache(String catName, String dbName, Database newDb) { try { @@ -922,7 +989,7 @@ public int getCachedDatabaseCount() { } } - public void populateTableInCache(Table table, ColumnStatistics tableColStats, + public boolean populateTableInCache(Table table, ColumnStatistics tableColStats, List partitions, List partitionColStats, AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { String catName = StringUtils.normalizeIdentifier(table.getCatName()); @@ -931,14 +998,26 @@ public void populateTableInCache(Table table, ColumnStatistics tableColStats, // Since we allow write operations on cache while prewarm is happening: // 1. Don't add tables that were deleted while we were preparing list for prewarm if (tablesDeletedDuringPrewarm.contains(CacheUtils.buildTableKey(catName, dbName, tableName))) { - return; + return false; } TableWrapper tblWrapper = createTableWrapper(catName, dbName, tableName, table); + ObjectEstimator tblWrapperSizeEstimator = getMemorySizeEstimator(TableWrapper.class); + long estimatedMemUsage = tblWrapperSizeEstimator.estimate(tblWrapper, sizeEstimators); + if (isCacheMemoryFull(estimatedMemUsage)) { + return false; + } else { + currentCacheSizeInBytes += estimatedMemUsage; + } if (!table.isSetPartitionKeys() && (tableColStats != null)) { - tblWrapper.updateTableColStats(tableColStats.getStatsObj()); + if (!tblWrapper.updateTableColStats(tableColStats.getStatsObj())) { + return false; + } } else { if (partitions != null) { - tblWrapper.cachePartitions(partitions, this); + // If the partitions were not added due to memory limit, return false + if (!tblWrapper.cachePartitions(partitions, this)) { + return false; + } } if (partitionColStats != null) { for (ColumnStatistics cs : partitionColStats) { @@ -946,7 +1025,9 @@ public void populateTableInCache(Table table, ColumnStatistics tableColStats, try { partVal = Warehouse.makeValsFromName(cs.getStatsDesc().getPartName(), null); List colStats = cs.getStatsObj(); - tblWrapper.updatePartitionColStats(partVal, colStats); + if (!tblWrapper.updatePartitionColStats(partVal, colStats)) { + return false; + } } catch (MetaException e) { LOG.debug("Unable to cache partition column stats for table: " + tableName, e); } @@ -960,12 +1041,15 @@ public void populateTableInCache(Table table, ColumnStatistics tableColStats, // 2. Skip overwriting exisiting table object // (which is present because it was added after prewarm started) tableCache.putIfAbsent(CacheUtils.buildTableKey(catName, dbName, tableName), tblWrapper); + return true; } finally { cacheLock.writeLock().unlock(); } } - + private static boolean isCacheMemoryFull(long estimatedMemUsage) { + return (0.8*maxCacheSizeInBytes) > (currentCacheSizeInBytes + estimatedMemUsage); + } public void completeTableCachePrewarm() { try { @@ -981,7 +1065,8 @@ public Table getTableFromCache(String catName, String dbName, String tableName) Table t = null; try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { t = CacheUtils.assemble(tblWrapper, this); } @@ -1003,7 +1088,8 @@ public TableWrapper addTableToCache(String catName, String dbName, String tblNam } } - private TableWrapper createTableWrapper(String catName, String dbName, String tblName, Table tbl) { + private TableWrapper createTableWrapper(String catName, String dbName, String tblName, + Table tbl) { TableWrapper wrapper; Table tblCopy = tbl.deepCopy(); tblCopy.setCatName(normalizeIdentifier(catName)); @@ -1026,7 +1112,6 @@ private TableWrapper createTableWrapper(String catName, String dbName, String tb return wrapper; } - public void removeTableFromCache(String catName, String dbName, String tblName) { try { cacheLock.writeLock().lock(); @@ -1035,7 +1120,8 @@ public void removeTableFromCache(String catName, String dbName, String tblName) if (!isTableCachePrewarmed) { tablesDeletedDuringPrewarm.add(CacheUtils.buildTableKey(catName, dbName, tblName)); } - TableWrapper tblWrapper = tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); byte[] sdHash = tblWrapper.getSdHash(); if (sdHash != null) { decrSd(sdHash); @@ -1049,7 +1135,8 @@ public void removeTableFromCache(String catName, String dbName, String tblName) public void alterTableInCache(String catName, String dbName, String tblName, Table newTable) { try { cacheLock.writeLock().lock(); - TableWrapper tblWrapper = tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.remove(CacheUtils.buildTableKey(catName, dbName, tblName)); if (tblWrapper != null) { tblWrapper.updateTableObj(newTable, this); String newDbName = StringUtils.normalizeIdentifier(newTable.getDbName()); @@ -1092,7 +1179,8 @@ public void alterTableInCache(String catName, String dbName, String tblName, Tab return tableNames; } - public List listCachedTableNames(String catName, String dbName, String pattern, short maxTables) { + public List listCachedTableNames(String catName, String dbName, String pattern, + short maxTables) { List tableNames = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1111,7 +1199,8 @@ public void alterTableInCache(String catName, String dbName, String tblName, Tab return tableNames; } - public List listCachedTableNames(String catName, String dbName, String pattern, TableType tableType) { + public List listCachedTableNames(String catName, String dbName, String pattern, + TableType tableType) { List tableNames = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1138,7 +1227,8 @@ public void refreshTablesInCache(String catName, String dbName, List tabl Map newTableCache = new HashMap<>(); for (Table tbl : tables) { String tblName = StringUtils.normalizeIdentifier(tbl.getTableName()); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); if (tblWrapper != null) { tblWrapper.updateTableObj(tbl, this); } else { @@ -1153,8 +1243,8 @@ public void refreshTablesInCache(String catName, String dbName, List
tabl } } - public List getTableColStatsFromCache( - String catName, String dbName, String tblName, List colNames) { + public List getTableColStatsFromCache(String catName, String dbName, + String tblName, List colNames) { List colStatObjs = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1168,7 +1258,8 @@ public void refreshTablesInCache(String catName, String dbName, List
tabl return colStatObjs; } - public void removeTableColStatsFromCache(String catName, String dbName, String tblName, String colName) { + public void removeTableColStatsFromCache(String catName, String dbName, String tblName, + String colName) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1181,10 +1272,11 @@ public void removeTableColStatsFromCache(String catName, String dbName, String t } public void updateTableColStatsInCache(String catName, String dbName, String tableName, - List colStatsForTable) { + List colStatsForTable) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.updateTableColStats(colStatsForTable); } @@ -1194,10 +1286,11 @@ public void updateTableColStatsInCache(String catName, String dbName, String tab } public void refreshTableColStatsInCache(String catName, String dbName, String tableName, - List colStatsForTable) { + List colStatsForTable) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.refreshTableColStats(colStatsForTable); } @@ -1216,7 +1309,7 @@ public int getCachedTableCount() { } public List getTableMeta(String catName, String dbNames, String tableNames, - List tableTypes) { + List tableTypes) { List tableMetas = new ArrayList<>(); try { cacheLock.readLock().lock(); @@ -1253,7 +1346,8 @@ public void addPartitionToCache(String catName, String dbName, String tblName, P } } - public void addPartitionsToCache(String catName, String dbName, String tblName, List parts) { + public void addPartitionsToCache(String catName, String dbName, String tblName, + List parts) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1266,7 +1360,7 @@ public void addPartitionsToCache(String catName, String dbName, String tblName, } public Partition getPartitionFromCache(String catName, String dbName, String tblName, - List partVals) { + List partVals) { Partition part = null; try { cacheLock.readLock().lock(); @@ -1280,7 +1374,8 @@ public Partition getPartitionFromCache(String catName, String dbName, String tbl return part; } - public boolean existPartitionFromCache(String catName, String dbName, String tblName, List partVals) { + public boolean existPartitionFromCache(String catName, String dbName, String tblName, + List partVals) { boolean existsPart = false; try { cacheLock.readLock().lock(); @@ -1295,7 +1390,7 @@ public boolean existPartitionFromCache(String catName, String dbName, String tbl } public Partition removePartitionFromCache(String catName, String dbName, String tblName, - List partVals) { + List partVals) { Partition part = null; try { cacheLock.readLock().lock(); @@ -1310,7 +1405,7 @@ public Partition removePartitionFromCache(String catName, String dbName, String } public void removePartitionsFromCache(String catName, String dbName, String tblName, - List> partVals) { + List> partVals) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1322,7 +1417,8 @@ public void removePartitionsFromCache(String catName, String dbName, String tblN } } - public List listCachedPartitions(String catName, String dbName, String tblName, int max) { + public List listCachedPartitions(String catName, String dbName, String tblName, + int max) { List parts = new ArrayList(); try { cacheLock.readLock().lock(); @@ -1336,8 +1432,8 @@ public void removePartitionsFromCache(String catName, String dbName, String tblN return parts; } - public void alterPartitionInCache(String catName, String dbName, String tblName, List partVals, - Partition newPart) { + public void alterPartitionInCache(String catName, String dbName, String tblName, + List partVals, Partition newPart) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1349,8 +1445,8 @@ public void alterPartitionInCache(String catName, String dbName, String tblName, } } - public void alterPartitionsInCache(String catName, String dbName, String tblName, List> partValsList, - List newParts) { + public void alterPartitionsInCache(String catName, String dbName, String tblName, + List> partValsList, List newParts) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1362,7 +1458,8 @@ public void alterPartitionsInCache(String catName, String dbName, String tblName } } - public void refreshPartitionsInCache(String catName, String dbName, String tblName, List partitions) { + public void refreshPartitionsInCache(String catName, String dbName, String tblName, + List partitions) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1375,7 +1472,7 @@ public void refreshPartitionsInCache(String catName, String dbName, String tblNa } public void removePartitionColStatsFromCache(String catName, String dbName, String tblName, - List partVals, String colName) { + List partVals, String colName) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1388,10 +1485,11 @@ public void removePartitionColStatsFromCache(String catName, String dbName, Stri } public void updatePartitionColStatsInCache(String catName, String dbName, String tableName, - List partVals, List colStatsObjs) { + List partVals, List colStatsObjs) { try { cacheLock.readLock().lock(); - TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); + TableWrapper tblWrapper = + tableCache.get(CacheUtils.buildTableKey(catName, dbName, tableName)); if (tblWrapper != null) { tblWrapper.updatePartitionColStats(partVals, colStatsObjs); } @@ -1400,13 +1498,13 @@ public void updatePartitionColStatsInCache(String catName, String dbName, String } } - public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String dbName, String tblName, - List partVal, String colName) { + public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String dbName, + String tblName, List partVal, String colName) { ColumnStatisticsObj colStatObj = null; try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); - if (tblWrapper != null){ + if (tblWrapper != null) { colStatObj = tblWrapper.getPartitionColStats(partVal, colName); } } finally { @@ -1416,7 +1514,7 @@ public ColumnStatisticsObj getPartitionColStatsFromCache(String catName, String } public void refreshPartitionColStatsInCache(String catName, String dbName, String tblName, - List partitionColStats) { + List partitionColStats) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1428,8 +1526,8 @@ public void refreshPartitionColStatsInCache(String catName, String dbName, Strin } } - public List getAggrStatsFromCache(String catName, String dbName, String tblName, - List colNames, StatsType statsType) { + public List getAggrStatsFromCache(String catName, String dbName, + String tblName, List colNames, StatsType statsType) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1443,11 +1541,11 @@ public void refreshPartitionColStatsInCache(String catName, String dbName, Strin } public void addAggregateStatsToCache(String catName, String dbName, String tblName, - AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { + AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); - if (tblWrapper != null){ + if (tblWrapper != null) { tblWrapper.cacheAggrPartitionColStats(aggrStatsAllPartitions, aggrStatsAllButDefaultPartition); } @@ -1457,7 +1555,7 @@ public void addAggregateStatsToCache(String catName, String dbName, String tblNa } public void refreshAggregateStatsInCache(String catName, String dbName, String tblName, - AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { + AggrStats aggrStatsAllPartitions, AggrStats aggrStatsAllButDefaultPartition) { try { cacheLock.readLock().lock(); TableWrapper tblWrapper = tableCache.get(CacheUtils.buildTableKey(catName, dbName, tblName)); @@ -1529,8 +1627,3 @@ public void incrementUpdateCount() { cacheUpdateCount.incrementAndGet(); } } - - - - - diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 995137f967..c8af96963f 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -303,6 +303,9 @@ public static ConfVars getMetaConf(String name) { "This can be used in conjunction with hive.metastore.cached.rawstore.cached.object.whitelist. \n" + "Example: db2.*, db3\\.tbl1, db3\\..*. The last item can potentially override patterns specified before. \n" + "The blacklist also overrides the whitelist."), + CACHED_RAW_STORE_MAX_CACHE_MEMORY("metastore.cached.rawstore.max.cache.memory", + "hive.metastore.cached.rawstore.max.cache.memory", "1Gb", new Validator.SizeValidator(), + "The maximum memory in bytes that the cached objects can use."), CAPABILITY_CHECK("metastore.client.capability.check", "hive.metastore.client.capability.check", true, "Whether to check client capabilities for potentially breaking API usage."), @@ -1779,6 +1782,98 @@ private String timeString(long time, TimeUnit timeUnit) { return time + " " + timeAbbreviationFor(timeUnit); } } + + class SizeValidator implements Validator { + private final Long min; + private final boolean minInclusive; + + private final Long max; + private final boolean maxInclusive; + + public SizeValidator() { + this(null, false, null, false); + } + + public SizeValidator(Long min, boolean minInclusive, Long max, boolean maxInclusive) { + this.min = min; + this.minInclusive = minInclusive; + this.max = max; + this.maxInclusive = maxInclusive; + } + + @Override + public void validate(String value) { + long size = toSizeBytes(value); + if (min != null && (minInclusive ? size < min : size <= min)) { + throw new IllegalArgumentException( + value + " is smaller than minimum " + min + sizeString(min)); + } + if (max != null && (maxInclusive ? size > max : size >= max)) { + throw new IllegalArgumentException( + value + " is larger than maximum " + max + sizeString(max)); + } + } + + public String toDescription() { + String description = + "Expects a byte size value with unit (blank for bytes, kb, mb, gb, tb, pb)"; + if (min != null && max != null) { + description += ".\nThe size should be in between " + sizeString(min) + + (minInclusive ? " (inclusive)" : " (exclusive)") + " and " + sizeString(max) + + (maxInclusive ? " (inclusive)" : " (exclusive)"); + } else if (min != null) { + description += ".\nThe time should be bigger than " + (minInclusive ? "or equal to " : "") + + sizeString(min); + } else if (max != null) { + description += ".\nThe size should be smaller than " + + (maxInclusive ? "or equal to " : "") + sizeString(max); + } + return description; + } + + private String sizeString(long size) { + final String[] units = { " bytes", "Kb", "Mb", "Gb", "Tb" }; + long current = 1; + for (int i = 0; i < units.length && current > 0; ++i) { + long next = current << 10; + if ((size & (next - 1)) != 0) + return (long) (size / current) + units[i]; + current = next; + } + return current > 0 ? ((long) (size / current) + "Pb") : (size + units[0]); + } + } + } + + public static long toSizeBytes(String value) { + String[] parsed = parseNumberFollowedByUnit(value.trim()); + return Long.parseLong(parsed[0].trim()) * multiplierFor(parsed[1].trim()); + } + + private static String[] parseNumberFollowedByUnit(String value) { + char[] chars = value.toCharArray(); + int i = 0; + for (; i < chars.length && (chars[i] == '-' || Character.isDigit(chars[i])); i++) { + } + return new String[] { value.substring(0, i), value.substring(i) }; + } + + public static long multiplierFor(String unit) { + unit = unit.trim().toLowerCase(); + if (unit.isEmpty() || unit.equals("b") || unit.equals("bytes")) { + return 1; + } else if (unit.equals("kb")) { + return 1024; + } else if (unit.equals("mb")) { + return 1024 * 1024; + } else if (unit.equals("gb")) { + return 1024 * 1024 * 1024; + } else if (unit.equals("tb")) { + return 1024L * 1024 * 1024 * 1024; + } else if (unit.equals("pb")) { + return 1024L * 1024 * 1024 * 1024 * 1024; + } + throw new IllegalArgumentException("Invalid size unit " + unit); } } diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java similarity index 72% rename from llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java rename to storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java index 6f4ec6f1ea..100124a868 100644 --- a/llap-server/src/java/org/apache/hadoop/hive/llap/IncrementalObjectSizeEstimator.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/util/IncrementalObjectSizeEstimator.java @@ -15,10 +15,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.llap; +package org.apache.hadoop.hive.ql.util; -import com.google.common.collect.Lists; -import com.google.protobuf.UnknownFieldSet; import java.lang.reflect.AccessibleObject; import java.lang.reflect.Array; import java.lang.reflect.Field; @@ -37,19 +35,23 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.llap.IncrementalObjectSizeEstimator.ObjectEstimator; -import org.apache.hadoop.hive.llap.cache.LlapCacheableBuffer; -import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl; -import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Creates size estimators for java objects. The estimators attempt to do most of the reflection * work at initialization time, and also take some shortcuts, to minimize the amount of work done - * during the actual estimation. TODO: clean up + * during the actual estimation. + * TODO: clean up */ public class IncrementalObjectSizeEstimator { public static final JavaDataModel memoryModel = JavaDataModel.get(); - private enum FieldType { PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER }; + static final private Logger LOG = + LoggerFactory.getLogger(IncrementalObjectSizeEstimator.class.getName()); + + private enum FieldType { + PRIMITIVE_ARRAY, OBJECT_ARRAY, COLLECTION, MAP, OTHER + }; public static HashMap, ObjectEstimator> createEstimators(Object rootObj) { HashMap, ObjectEstimator> byType = new HashMap<>(); @@ -67,7 +69,7 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti Object obj = stack.pop(); Class clazz; if (obj instanceof Class) { - clazz = (Class)obj; + clazz = (Class) obj; obj = null; } else { clazz = obj.getClass(); @@ -78,12 +80,15 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti // The object was added later for the same class; see addToProcessing. continue; } - if (estimator.isProcessed()) continue; + if (estimator.isProcessed()) + continue; estimator.init(); - for (Field field : getAllFields(clazz, LlapCacheableBuffer.class)) { + for (Field field : getAllFields(clazz)) { Class fieldClass = field.getType(); - if (Modifier.isStatic(field.getModifiers())) continue; - if (Class.class.isAssignableFrom(fieldClass)) continue; + if (Modifier.isStatic(field.getModifiers())) + continue; + if (Class.class.isAssignableFrom(fieldClass)) + continue; if (fieldClass.isPrimitive()) { estimator.addPrimitive(fieldClass); continue; @@ -116,8 +121,8 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti addToProcessing(byType, stack, fieldObj, fieldClass); } } - estimator.directSize = (int) JavaDataModel.alignUp( - estimator.directSize, memoryModel.memoryAlign()); + estimator.directSize = + (int) JavaDataModel.alignUp(estimator.directSize, memoryModel.memoryAlign()); } } @@ -126,7 +131,7 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti Deque stack = new ArrayDeque(32); Class rootClass = rootObj.getClass(); if (Class.class.equals(rootClass)) { - rootClass = (Class)rootObj; + rootClass = (Class) rootObj; rootObj = null; } else { // If root object is an array, map or collection, add estimators as for fields @@ -142,22 +147,16 @@ public static void createEstimators(Object rootObj, HashMap, ObjectEsti return stack; } - private static void addHardcodedEstimators( - HashMap, ObjectEstimator> byType) { + private static void addHardcodedEstimators(HashMap, ObjectEstimator> byType) { // Add hacks for well-known collections and maps to avoid estimating them. - byType.put(ArrayList.class, new CollectionEstimator( - memoryModel.arrayList(), memoryModel.ref())); - byType.put(LinkedList.class, new CollectionEstimator( - memoryModel.linkedListBase(), memoryModel.linkedListEntry())); - byType.put(HashSet.class, new CollectionEstimator( - memoryModel.hashSetBase(), memoryModel.hashSetEntry())); - byType.put(HashMap.class, new CollectionEstimator( - memoryModel.hashMapBase(), memoryModel.hashMapEntry())); - // Add a hack for UnknownFieldSet because we assume it will never have anything (TODO: clear?) - ObjectEstimator ufsEstimator = new ObjectEstimator(false); - ufsEstimator.directSize = memoryModel.object() * 2 + memoryModel.ref(); - byType.put(UnknownFieldSet.class, ufsEstimator); - // TODO: 1-field hack for UnmodifiableCollection for protobuf too + byType.put(ArrayList.class, + new CollectionEstimator(memoryModel.arrayList(), memoryModel.ref())); + byType.put(LinkedList.class, + new CollectionEstimator(memoryModel.linkedListBase(), memoryModel.linkedListEntry())); + byType.put(HashSet.class, + new CollectionEstimator(memoryModel.hashSetBase(), memoryModel.hashSetEntry())); + byType.put(HashMap.class, + new CollectionEstimator(memoryModel.hashMapBase(), memoryModel.hashMapEntry())); } private static Object extractFieldObj(Object obj, Field field) { @@ -170,7 +169,8 @@ private static Object extractFieldObj(Object obj, Field field) { private static Class determineRealClass(HashMap, ObjectEstimator> byType, Deque stack, Field field, Class fieldClass, Object fieldObj) { - if (fieldObj == null) return fieldClass; + if (fieldObj == null) + return fieldClass; Class realFieldClass = fieldObj.getClass(); if (!fieldClass.equals(realFieldClass)) { addToProcessing(byType, stack, null, fieldClass); @@ -183,10 +183,10 @@ private static void addCollectionEstimator(HashMap, ObjectEstimator> by Deque stack, Field field, Class fieldClass, Object fieldObj) { Collection fieldCol = null; if (fieldObj != null) { - fieldCol = (Collection)fieldObj; + fieldCol = (Collection) fieldObj; if (fieldCol.size() == 0) { fieldCol = null; - LlapIoImpl.LOG.trace("Empty collection {}", field); + LOG.trace("Empty collection {}", field); } } if (fieldCol != null) { @@ -202,8 +202,8 @@ private static void addCollectionEstimator(HashMap, ObjectEstimator> by addToProcessing(byType, stack, null, collectionArg); } // TODO: there was code here to create guess-estimate for collection wrt how usage changes - // when removing elements. However it's too error-prone for anything involving - // pre-allocated capacity, so it was discarded. + // when removing elements. However it's too error-prone for anything involving + // pre-allocated capacity, so it was discarded. // We will estimate collection as an object (only if it's a field). addToProcessing(byType, stack, fieldObj, fieldClass); @@ -214,10 +214,10 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, Deque stack, Field field, Class fieldClass, Object fieldObj) { Map fieldCol = null; if (fieldObj != null) { - fieldCol = (Map)fieldObj; + fieldCol = (Map) fieldObj; if (fieldCol.size() == 0) { fieldCol = null; - LlapIoImpl.LOG.trace("Empty map {}", field); + LOG.trace("Empty map {}", field); } } if (fieldCol != null) { @@ -248,16 +248,16 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { - Type[] types = ((ParameterizedType)genericType).getActualTypeArguments(); + Type[] types = ((ParameterizedType) genericType).getActualTypeArguments(); if (types.length == 2 && types[0] instanceof Class && types[1] instanceof Class) { - return new Class[] { (Class)types[0], (Class)types[1] }; + return new Class[] { (Class) types[0], (Class) types[1] }; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. - LlapIoImpl.LOG.trace("Cannot determine map type: {}", field); + LOG.trace("Cannot determine map type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. - LlapIoImpl.LOG.trace("Non-parametrized map type: {}", field); + LOG.trace("Non-parametrized map type: {}", field); } return null; } @@ -266,26 +266,26 @@ private static void addMapEstimator(HashMap, ObjectEstimator> byType, // TODO: this makes many assumptions, e.g. on how generic args are done Type genericType = field.getGenericType(); if (genericType instanceof ParameterizedType) { - Type type = ((ParameterizedType)genericType).getActualTypeArguments()[0]; + Type type = ((ParameterizedType) genericType).getActualTypeArguments()[0]; if (type instanceof Class) { - return (Class)type; + return (Class) type; } else { // TODO: we could try to get the declaring object and infer argument... stupid Java. - LlapIoImpl.LOG.trace("Cannot determine collection type: {}", field); + LOG.trace("Cannot determine collection type: {}", field); } } else { // TODO: we could try to get superclass or generic interfaces. - LlapIoImpl.LOG.trace("Non-parametrized collection type: {}", field); + LOG.trace("Non-parametrized collection type: {}", field); } return null; } - private static void addArrayEstimator( - HashMap, ObjectEstimator> byType, Deque stack, - Field field, Object fieldObj) { - if (fieldObj == null) return; + private static void addArrayEstimator(HashMap, ObjectEstimator> byType, + Deque stack, Field field, Object fieldObj) { + if (fieldObj == null) + return; int arrayLen = Array.getLength(fieldObj); - LlapIoImpl.LOG.trace("Empty array {}", field); + LOG.trace("Empty array {}", field); for (int i = 0; i < arrayLen; ++i) { Object element = Array.get(fieldObj, i); if (element != null) { @@ -299,9 +299,11 @@ private static void addArrayEstimator( private static void addToProcessing(HashMap, ObjectEstimator> byType, Deque stack, Object element, Class elementClass) { ObjectEstimator existing = byType.get(elementClass); - if (existing != null && (!existing.isFromClass || (element == null))) return; + if (existing != null && (!existing.isFromClass || (element == null))) + return; if (elementClass.isInterface()) { - if (element == null) return; + if (element == null) + return; elementClass = element.getClass(); } byType.put(elementClass, new ObjectEstimator(element == null)); @@ -309,22 +311,25 @@ private static void addToProcessing(HashMap, ObjectEstimator> byType, } private static int getPrimitiveSize(Class fieldClass) { - if (fieldClass == long.class || fieldClass == double.class) return 8; - if (fieldClass == int.class || fieldClass == float.class) return 4; - if (fieldClass == short.class || fieldClass == char.class) return 2; - if (fieldClass == byte.class || fieldClass == boolean.class) return 1; - throw new AssertionError("Unrecognized primitive " + fieldClass.getName()); + if (fieldClass == long.class || fieldClass == double.class) + return 8; + if (fieldClass == int.class || fieldClass == float.class) + return 4; + if (fieldClass == short.class || fieldClass == char.class) + return 2; + if (fieldClass == byte.class || fieldClass == boolean.class) + return 1; + throw new AssertionError("Unrecognized primitive " + fieldClass.getName()); } - private static Iterable getAllFields(Class clazz, Class topClass) { - List fields = Lists.newArrayListWithCapacity(8); + private static Iterable getAllFields(Class clazz) { + List fields = new ArrayList<>(8); while (clazz != null) { fields.addAll(Arrays.asList(clazz.getDeclaredFields())); clazz = clazz.getSuperclass(); - if (clazz == topClass) break; } - //all together so there is only one security check + // all together so there is only one security check AccessibleObject.setAccessible(fields.toArray(new AccessibleObject[fields.size()]), true); return fields; } @@ -334,6 +339,7 @@ public FieldAndType(Field field, FieldType type) { this.field = field; this.type = type; } + public Field field; public FieldType type; } @@ -372,8 +378,7 @@ private void addField(FieldType type, Field field) { fields.add(new FieldAndType(field, type)); } - public int estimate( - Object obj, HashMap, ObjectEstimator> parent) { + public int estimate(Object obj, HashMap, ObjectEstimator> parent) { IdentityHashMap uniqueObjects = new IdentityHashMap<>(); uniqueObjects.put(obj, Boolean.TRUE); return estimate(obj, parent, uniqueObjects); @@ -382,7 +387,8 @@ public int estimate( protected int estimate(Object obj, HashMap, ObjectEstimator> parent, IdentityHashMap uniqueObjects) { // TODO: maybe use stack of est+obj pairs instead of recursion. - if (fields == null) return directSize; + if (fields == null) + return directSize; int referencedSize = 0; for (FieldAndType e : fields) { Object fieldObj; @@ -392,15 +398,17 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, throw new AssertionError("IAE: " + ex.getMessage()); } // reference is already accounted for in the directSize. - if (fieldObj == null) continue; - if (null != uniqueObjects.put(fieldObj, Boolean.TRUE)) continue; + if (fieldObj == null) + continue; + if (null != uniqueObjects.put(fieldObj, Boolean.TRUE)) + continue; switch (e.type) { case COLLECTION: { - Collection c = (Collection)fieldObj; + Collection c = (Collection) fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. - LlapIoImpl.LOG.trace("Approximate estimation for collection {} from {}", e.field, + LOG.trace("Approximate estimation for collection {} from {}", e.field, fieldObj.getClass().getName()); referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); @@ -408,43 +416,44 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateCollectionElements(parent, c, e.field, uniqueObjects); - referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(c.size()); + referencedSize += ((CollectionEstimator) collEstimator).estimateOverhead(c.size()); } else { // We decided to treat this collection as regular object. - LlapIoImpl.LOG.trace("Verbose estimation for collection {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Verbose estimation for collection {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += collEstimator.estimate(c, parent, uniqueObjects); } break; } case MAP: { - Map m = (Map)fieldObj; + Map m = (Map) fieldObj; ObjectEstimator collEstimator = parent.get(fieldObj.getClass()); if (collEstimator == null) { // We have no estimator for this type... assume low overhead and hope for the best. - LlapIoImpl.LOG.trace("Approximate estimation for map {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Approximate estimation for map {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); - referencedSize += memoryModel.array() + m.size() - * (memoryModel.ref() * 2 + memoryModel.object()); + referencedSize += + memoryModel.array() + m.size() * (memoryModel.ref() * 2 + memoryModel.object()); } else if (collEstimator instanceof CollectionEstimator) { referencedSize += memoryModel.object(); referencedSize += estimateMapElements(parent, m, e.field, uniqueObjects); - referencedSize += ((CollectionEstimator)collEstimator).estimateOverhead(m.size()); + referencedSize += ((CollectionEstimator) collEstimator).estimateOverhead(m.size()); } else { // We decided to treat this map as regular object. - LlapIoImpl.LOG.trace("Verbose estimation for map {} from {}", - fieldObj.getClass().getName(), e.field); + LOG.trace("Verbose estimation for map {} from {}", fieldObj.getClass().getName(), + e.field); referencedSize += collEstimator.estimate(m, parent, uniqueObjects); } break; } case OBJECT_ARRAY: { int len = Array.getLength(fieldObj); - referencedSize += JavaDataModel.alignUp( - memoryModel.array() + len * memoryModel.ref(), memoryModel.memoryAlign()); - if (len == 0) continue; + referencedSize += JavaDataModel.alignUp(memoryModel.array() + len * memoryModel.ref(), + memoryModel.memoryAlign()); + if (len == 0) + continue; referencedSize += estimateArrayElements(parent, e, fieldObj, len, uniqueObjects); break; } @@ -463,13 +472,14 @@ protected int estimate(Object obj, HashMap, ObjectEstimator> parent, ObjectEstimator fieldEstimator = parent.get(fieldObj.getClass()); if (fieldEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure " - + fieldObj.getClass().getName() + " from " + e.field); + throw new AssertionError( + "Don't know how to measure " + fieldObj.getClass().getName() + " from " + e.field); } referencedSize += fieldEstimator.estimate(fieldObj, parent, uniqueObjects); break; } - default: throw new AssertionError("Unknown type " + e.type); + default: + throw new AssertionError("Unknown type " + e.type); } } return directSize + referencedSize; @@ -482,16 +492,18 @@ private int estimateArrayElements(HashMap, ObjectEstimator> parent, Fie ObjectEstimator lastEstimator = parent.get(lastClass); for (int i = 0; i < len; ++i) { Object element = Array.get(fieldObj, i); - if (element == null) continue; - if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; + if (element == null) + continue; + if (null != uniqueObjects.put(element, Boolean.TRUE)) + continue; Class elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure element " - + lastClass.getName() + " from " + e.field); + throw new AssertionError( + "Don't know how to measure element " + lastClass.getName() + " from " + e.field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); @@ -505,16 +517,18 @@ protected int estimateCollectionElements(HashMap, ObjectEstimator> pare Class lastClass = null; int result = 0; for (Object element : c) { - if (element == null) continue; - if (null != uniqueObjects.put(element, Boolean.TRUE)) continue; + if (element == null) + continue; + if (null != uniqueObjects.put(element, Boolean.TRUE)) + continue; Class elementClass = element.getClass(); if (lastClass != elementClass) { lastClass = elementClass; lastEstimator = parent.get(lastClass); if (lastEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure element " - + lastClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure element " + lastClass.getName() + " from " + field); } } result += lastEstimator.estimate(element, parent, uniqueObjects); @@ -522,35 +536,38 @@ protected int estimateCollectionElements(HashMap, ObjectEstimator> pare return result; } - protected int estimateMapElements(HashMap, ObjectEstimator> parent, - Map m, Field field, IdentityHashMap uniqueObjects) { + protected int estimateMapElements(HashMap, ObjectEstimator> parent, Map m, + Field field, IdentityHashMap uniqueObjects) { ObjectEstimator keyEstimator = null, valueEstimator = null; Class lastKeyClass = null, lastValueClass = null; int result = 0; for (Map.Entry element : m.entrySet()) { Object key = element.getKey(), value = element.getValue(); - if (null != uniqueObjects.put(key, Boolean.TRUE)) continue; + if (null != uniqueObjects.put(key, Boolean.TRUE)) + continue; Class keyClass = key.getClass(); if (lastKeyClass != keyClass) { lastKeyClass = keyClass; keyEstimator = parent.get(lastKeyClass); if (keyEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure key " - + lastKeyClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure key " + lastKeyClass.getName() + " from " + field); } } result += keyEstimator.estimate(element, parent, uniqueObjects); - if (value == null) continue; - if (null != uniqueObjects.put(value, Boolean.TRUE)) continue; + if (value == null) + continue; + if (null != uniqueObjects.put(value, Boolean.TRUE)) + continue; Class valueClass = value.getClass(); if (lastValueClass != valueClass) { lastValueClass = valueClass; valueEstimator = parent.get(lastValueClass); if (valueEstimator == null) { // TODO: use reflection? - throw new AssertionError("Don't know how to measure value " - + lastValueClass.getName() + " from " + field); + throw new AssertionError( + "Don't know how to measure value " + lastValueClass.getName() + " from " + field); } } result += valueEstimator.estimate(element, parent, uniqueObjects); @@ -572,14 +589,14 @@ public CollectionEstimator(int base, int perElement) { protected int estimate(Object obj, HashMap, ObjectEstimator> parent, IdentityHashMap uniqueObjects) { if (obj instanceof Collection) { - Collection c = (Collection)obj; - int overhead = estimateOverhead(c.size()), elements = estimateCollectionElements( - parent, c, null, uniqueObjects); + Collection c = (Collection) obj; + int overhead = estimateOverhead(c.size()), + elements = estimateCollectionElements(parent, c, null, uniqueObjects); return overhead + elements + memoryModel.object(); } else if (obj instanceof Map) { - Map m = (Map)obj; - int overhead = estimateOverhead(m.size()), elements = estimateMapElements( - parent, m, null, uniqueObjects); + Map m = (Map) obj; + int overhead = estimateOverhead(m.size()), + elements = estimateMapElements(parent, m, null, uniqueObjects); return overhead + elements + memoryModel.object(); } throw new AssertionError(obj.getClass().getName()); @@ -591,13 +608,13 @@ int estimateOverhead(int size) { } public static void addEstimator(String className, - HashMap, ObjectEstimator> sizeEstimators) { + HashMap, ObjectEstimator> sizeEstimators, Class topClass) { Class clazz = null; try { clazz = Class.forName(className); } catch (ClassNotFoundException e) { // Ignore and hope for the best. - LlapIoImpl.LOG.warn("Cannot find " + className); + LOG.warn("Cannot find " + className); return; } IncrementalObjectSizeEstimator.createEstimators(clazz, sizeEstimators);