From b4463e7cdd530ee31383c4ee3b97c92bacdb372c Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Tue, 10 Nov 2015 12:21:32 -0800 Subject: [PATCH] HBASE-14793 Allow limiting size of block into L1 block cache. --- .../regionserver/MetricsRegionServerSource.java | 3 ++ .../regionserver/MetricsRegionServerWrapper.java | 5 +++ .../MetricsRegionServerSourceImpl.java | 3 ++ .../apache/hadoop/hbase/io/hfile/CacheStats.java | 11 +++++ .../hadoop/hbase/io/hfile/LruBlockCache.java | 31 ++++++++++--- .../hadoop/hbase/io/hfile/bucket/BucketCache.java | 9 +--- .../MetricsRegionServerWrapperImpl.java | 5 +++ .../hadoop/hbase/io/hfile/TestLruBlockCache.java | 51 ++++++++++++++++++++-- .../MetricsRegionServerWrapperStub.java | 4 ++ .../regionserver/TestMetricsRegionServer.java | 1 + 10 files changed, 107 insertions(+), 16 deletions(-) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index 06190c6..ee0217a 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -227,6 +227,9 @@ public interface MetricsRegionServerSource extends BaseSource { String BLOCK_CACHE_EXPRESS_HIT_PERCENT = "blockCacheExpressHitPercent"; String BLOCK_CACHE_EXPRESS_HIT_PERCENT_DESC = "The percent of the time that requests with the cache turned on hit the cache."; + String BLOCK_CACHE_FAILED_INSERTION_COUNT = "blockCacheFailedInsertionCount"; + String BLOCK_CACHE_FAILED_INSERTION_COUNT_DESC = "Number of times that a block cache " + + "insertion failed. Usually due to size restrictions."; String RS_START_TIME_NAME = "regionServerStartTime"; String ZOOKEEPER_QUORUM_NAME = "zookeeperQuorum"; String SERVER_NAME_NAME = "serverName"; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java index 7851770..6921222 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java @@ -234,6 +234,11 @@ public interface MetricsRegionServerWrapper { double getBlockCacheHitCachingPercent(); /** + * Number of cache insertions that failed. + */ + long getBlockCacheFailedInsertions(); + + /** * Force a re-computation of the metrics. */ void forceRecompute(); diff --git a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index a4891da..00a3b8b 100644 --- a/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop2-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -35,6 +35,7 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; public class MetricsRegionServerSourceImpl extends BaseSourceImpl implements MetricsRegionServerSource { + final MetricsRegionServerWrapper rsWrap; private final MetricHistogram putHisto; private final MetricHistogram deleteHisto; @@ -250,6 +251,8 @@ public class MetricsRegionServerSourceImpl rsWrap.getBlockCacheHitPercent()) .addGauge(Interns.info(BLOCK_CACHE_EXPRESS_HIT_PERCENT, BLOCK_CACHE_EXPRESS_HIT_PERCENT_DESC), rsWrap.getBlockCacheHitCachingPercent()) + .addCounter(Interns.info(BLOCK_CACHE_FAILED_INSERTION_COUNT, + BLOCK_CACHE_FAILED_INSERTION_COUNT_DESC),rsWrap.getBlockCacheFailedInsertions()) .addCounter(Interns.info(UPDATES_BLOCKED_TIME, UPDATES_BLOCKED_DESC), rsWrap.getUpdatesBlockedTime()) .addCounter(Interns.info(FLUSHED_CELLS, FLUSHED_CELLS_DESC), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java index 9301de2..2dae66f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java @@ -74,6 +74,9 @@ public class CacheStats { /** The total number of blocks for primary replica that have been evicted */ private final AtomicLong primaryEvictedBlockCount = new AtomicLong(0); + /** The total number of blocks that were not inserted. */ + private final AtomicLong failedInserts = new AtomicLong(0); + /** The number of metrics periods to include in window */ private final int numPeriodsInWindow; /** Hit counts for each period in window */ @@ -154,6 +157,10 @@ public class CacheStats { } } + public long failInsert() { + return failedInserts.incrementAndGet(); + } + public long getRequestCount() { return getHitCount() + getMissCount(); } @@ -218,6 +225,10 @@ public class CacheStats { return ((float)getEvictedCount()/(float)getEvictionCount()); } + public long getFailedInserts() { + return failedInserts.get(); + } + public void rollMetricsPeriod() { hitCounts[windowIndex] = getHitCount() - lastHitCount; lastHitCount = getHitCount(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java index 04983f6..68ce16c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java @@ -140,12 +140,15 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { /** Statistics thread */ static final int statThreadPeriod = 60 * 5; + private static final String LRU_MAX_BLOCK_SIZE = "hbase.lru.max.block.size"; + private static final long DEFAULT_MAX_BLOCK_SIZE = 16L * 1024L * 1024L; /** Concurrent map (the cache) */ private final Map map; /** Eviction lock (locked when eviction in process) */ private final ReentrantLock evictionLock = new ReentrantLock(true); + private final long maxBlockSize; /** Volatile boolean to track if we are in an eviction process or not */ private volatile boolean evictionInProgress = false; @@ -223,7 +226,8 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { DEFAULT_SINGLE_FACTOR, DEFAULT_MULTI_FACTOR, DEFAULT_MEMORY_FACTOR, - false + false, + DEFAULT_MAX_BLOCK_SIZE ); } @@ -237,7 +241,8 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR), conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR), conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR), - conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE) + conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE), + conf.getLong(LRU_MAX_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE) ); } @@ -262,7 +267,8 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, int mapInitialSize, float mapLoadFactor, int mapConcurrencyLevel, float minFactor, float acceptableFactor, float singleFactor, - float multiFactor, float memoryFactor, boolean forceInMemory) { + float multiFactor, float memoryFactor, boolean forceInMemory, long maxBlockSize) { + this.maxBlockSize = maxBlockSize; if(singleFactor + multiFactor + memoryFactor != 1 || singleFactor < 0 || multiFactor < 0 || memoryFactor < 0) { throw new IllegalArgumentException("Single, multi, and memory factors " + @@ -324,6 +330,21 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { @Override public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory, final boolean cacheDataInL1) { + + if (buf.heapSize() > maxBlockSize) { + // If there are a lot of blocks that are too + // big this can make the logs way too noisy. + // So we log 2% + if (stats.failInsert() % 50 == 0) { + LOG.warn("Trying to cache too large a block " + + cacheKey.getHfileName() + " @ " + + cacheKey.getOffset() + + " is " + buf.heapSize() + + " which is larger than " + maxBlockSize); + } + return; + } + LruCachedBlock cb = map.get(cacheKey); if (cb != null) { // compare the contents, if they are not equal, we are in big trouble @@ -881,8 +902,8 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { } public final static long CACHE_FIXED_OVERHEAD = ClassSize.align( - (3 * Bytes.SIZEOF_LONG) + (9 * ClassSize.REFERENCE) + - (5 * Bytes.SIZEOF_FLOAT) + Bytes.SIZEOF_BOOLEAN + (3 * Bytes.SIZEOF_LONG) + (10 * ClassSize.REFERENCE) + + (5 * Bytes.SIZEOF_FLOAT) + (2 * Bytes.SIZEOF_BOOLEAN) + ClassSize.OBJECT); @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index 718e92a..c990ef4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -153,7 +153,6 @@ public class BucketCache implements BlockCache, HeapSize { private final AtomicLong heapSize = new AtomicLong(0); /** Current number of cached elements */ private final AtomicLong blockNumber = new AtomicLong(0); - private final AtomicLong failedBlockAdditions = new AtomicLong(0); /** Cache access count (sequential ID) */ private final AtomicLong accessCount = new AtomicLong(0); @@ -377,7 +376,7 @@ public class BucketCache implements BlockCache, HeapSize { } if (!successfulAddition) { ramCache.remove(cacheKey); - failedBlockAdditions.incrementAndGet(); + cacheStats.failInsert(); } else { this.blockNumber.incrementAndGet(); this.heapSize.addAndGet(cachedItem.heapSize()); @@ -574,7 +573,7 @@ public class BucketCache implements BlockCache, HeapSize { long usedSize = bucketAllocator.getUsedSize(); long freeSize = totalSize - usedSize; long cacheSize = getRealCacheSize(); - LOG.info("failedBlockAdditions=" + getFailedBlockAdditions() + ", " + + LOG.info("failedBlockAdditions=" + cacheStats.getFailedInserts() + ", " + "totalSize=" + StringUtils.byteDesc(totalSize) + ", " + "freeSize=" + StringUtils.byteDesc(freeSize) + ", " + "usedSize=" + StringUtils.byteDesc(usedSize) +", " + @@ -595,10 +594,6 @@ public class BucketCache implements BlockCache, HeapSize { cacheStats.reset(); } - public long getFailedBlockAdditions() { - return this.failedBlockAdditions.get(); - } - public long getRealCacheSize() { return this.realCacheSize.get(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java index b206749..e908be6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java @@ -351,6 +351,11 @@ class MetricsRegionServerWrapperImpl return (ratio * 100); } + @Override + public long getBlockCacheFailedInsertions() { + return this.cacheStats.getFailedInserts(); + } + @Override public void forceRecompute() { this.runnable.run(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java index 9a548f5..4c0f98f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestLruBlockCache.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hbase.io.hfile; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import java.nio.ByteBuffer; @@ -266,7 +268,8 @@ public class TestLruBlockCache { 0.33f, // single 0.33f, // multi 0.34f, // memory - false); + false, + 16 * 1024 * 1024); CachedItem [] singleBlocks = generateFixedBlocks(5, blockSize, "single"); CachedItem [] multiBlocks = generateFixedBlocks(5, blockSize, "multi"); @@ -386,7 +389,8 @@ public class TestLruBlockCache { 0.2f, // single 0.3f, // multi 0.5f, // memory - true); + true, + 16 * 1024 * 1024); CachedItem [] singleBlocks = generateFixedBlocks(10, blockSize, "single"); CachedItem [] multiBlocks = generateFixedBlocks(10, blockSize, "multi"); @@ -491,7 +495,8 @@ public class TestLruBlockCache { 0.33f, // single 0.33f, // multi 0.34f, // memory - false); + false, + 16 * 1024 * 1024); CachedItem [] singleBlocks = generateFixedBlocks(20, blockSize, "single"); CachedItem [] multiBlocks = generateFixedBlocks(5, blockSize, "multi"); @@ -539,6 +544,43 @@ public class TestLruBlockCache { } + @Test + public void testMaxBlockSize() throws Exception { + long maxSize = 100000; + long blockSize = calculateBlockSize(maxSize, 10); + + LruBlockCache cache = new LruBlockCache(maxSize, blockSize, false, + (int)Math.ceil(1.2*maxSize/blockSize), + LruBlockCache.DEFAULT_LOAD_FACTOR, + LruBlockCache.DEFAULT_CONCURRENCY_LEVEL, + 0.66f, // min + 0.99f, // acceptable + 0.33f, // single + 0.33f, // multi + 0.34f, // memory + false, + 1024); + CachedItem [] tooLong = generateFixedBlocks(10, 1024+5, "long"); + CachedItem [] small = generateFixedBlocks(15, 600, "small"); + + + for (CachedItem i:tooLong) { + cache.cacheBlock(i.cacheKey, i); + } + for (CachedItem i:small) { + cache.cacheBlock(i.cacheKey, i); + } + assertEquals(15,cache.getBlockCount()); + for (CachedItem i:small) { + assertNotNull(cache.getBlock(i.cacheKey, true, false, false)); + } + for (CachedItem i:tooLong) { + assertNull(cache.getBlock(i.cacheKey, true, false, false)); + } + + assertEquals(10, cache.getStats().getFailedInserts()); + } + // test setMaxSize @Test public void testResizeBlockCache() throws Exception { @@ -555,7 +597,8 @@ public class TestLruBlockCache { 0.33f, // single 0.33f, // multi 0.34f, // memory - false); + false, + 16 * 1024 * 1024); CachedItem [] singleBlocks = generateFixedBlocks(10, blockSize, "single"); CachedItem [] multiBlocks = generateFixedBlocks(10, blockSize, "multi"); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java index 1c3e8bf..f450f01 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java @@ -210,6 +210,10 @@ public class MetricsRegionServerWrapperStub implements MetricsRegionServerWrappe return 97; } + @Override + public long getBlockCacheFailedInsertions() { + return 36; + } @Override public long getUpdatesBlockedTime() { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java index ffaae0c..f3ce0bd 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java @@ -87,6 +87,7 @@ public class TestMetricsRegionServer { HELPER.assertCounter("blockCacheEvictionCount", 418, serverSource); HELPER.assertGauge("blockCacheCountHitPercent", 98, serverSource); HELPER.assertGauge("blockCacheExpressHitPercent", 97, serverSource); + HELPER.assertCounter("blockCacheFailedInsertionCount", 36, serverSource); HELPER.assertCounter("updatesBlockedTime", 419, serverSource); } -- 2.6.1