diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 22bffee..666b357 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -63,8 +63,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { @Override public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory, final boolean cacheDataInL1) { - boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; - if (isMetaBlock || cacheDataInL1) { + boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; + if (metaBlock || cacheDataInL1) { lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1); } else { l2Cache.cacheBlock(cacheKey, buf, inMemory, false); @@ -81,12 +81,9 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { boolean repeat, boolean updateCacheMetrics) { // TODO: is there a hole here, or just awkwardness since in the lruCache getBlock // we end up calling l2Cache.getBlock. - if (lruCache.containsBlock(cacheKey)) { - return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); - } - Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); - - return result; + return lruCache.containsBlock(cacheKey)? + lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics): + l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java index 86f2fbc..26272c1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ExplicitColumnTracker.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode; * of versions during the course of a Get or Scan operation, when explicit * column qualifiers have been asked for in the query. * - * With a little magic (see {@link ScanQueryMatcher}), we can use this matcher + *

With a little magic (see {@link ScanQueryMatcher}), we can use this matcher * for both scans and gets. The main difference is 'next' and 'done' collapse * for the scan case (since we see all columns in order), and we only reset * between rows. @@ -92,8 +92,8 @@ public class ExplicitColumnTracker implements ColumnTracker { reset(); } - /** - * Done when there are no more columns to match against. + /** + * @return True when there are no more columns to match against. */ public boolean done() { return this.index >= columns.length; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 406850e..4b819cb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -5681,7 +5681,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi /** * Fetches records with currentRow into results list, until next row, batchLimit (if not -1) is - * reached, or remainingResultSize (if not -1) is reaced + * reached, or remainingResultSize (if not -1) is reached * @param heap KeyValueHeap to fetch data from.It must be positioned on correct row before call. * @param scannerContext * @param currentRowCell diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java index c220b5c..0fe990d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java @@ -146,6 +146,11 @@ public class ScanQueryMatcher { private final boolean isReversed; /** + * True if we are doing a Get Scan (every Get is a Scan). + */ + private final boolean getScan; + + /** * Construct a QueryMatcher for a scan * @param scan * @param scanInfo The store's immutable scan info @@ -166,6 +171,7 @@ public class ScanQueryMatcher { } else { this.tr = timeRange; } + this.getScan = scan.isGetScan(); this.rowComparator = scanInfo.getComparator(); this.regionCoprocessorHost = regionCoprocessorHost; this.deletes = instantiateDeleteTracker(); @@ -280,7 +286,7 @@ public class ScanQueryMatcher { * caused by a data corruption. */ public MatchCode match(Cell cell) throws IOException { - if (filter != null && filter.filterAllRemaining()) { + if (filter != null && filter.filterAllRemaining()) { return MatchCode.DONE_SCAN; } if (curCell != null) { @@ -511,7 +517,10 @@ public class ScanQueryMatcher { return true; } } - if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) && + // If a 'get' Scan -- we are doing a Get (every Get is a Scan), then we are looking at one + // row only, the one specified in the Get coordinate. + if (this.getScan) return false; + if (!Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) && rowComparator.compareRows(kv, stopRow, 0, stopRow.length) >= 0) { // KV >= STOPROW // then NO there is nothing left. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBackedByBucketCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBackedByBucketCache.java index 5c2e7d6..2015f81 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBackedByBucketCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBackedByBucketCache.java @@ -35,12 +35,16 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.hfile.bucket.BucketAllocator; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.regionserver.BloomType; +import org.apache.hadoop.hbase.regionserver.StoreFile; +import org.apache.hadoop.hbase.regionserver.StoreFileScanner; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.junit.Before; @@ -57,11 +61,10 @@ import org.junit.rules.TestRule; public class TestHFileBackedByBucketCache { private static final Log LOG = LogFactory.getLog(TestHFileBackedByBucketCache.class); @Rule public TestName name = new TestName(); - @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). - withLookingForStuckThread(true).build(); + // @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). + // withLookingForStuckThread(true).build(); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final int ROW_LENGTH = 4; - private Configuration conf; private FileSystem fs; // MATH! SIZING FOR THE TEST! @@ -99,14 +102,15 @@ public class TestHFileBackedByBucketCache { public void before() throws IOException { // Do setup of a bucketcache that has one bucket only. Enable trace-level logging for // key classes. - this.conf = TEST_UTIL.getConfiguration(); - this.fs = FileSystem.get(conf); + this.fs = FileSystem.get(TEST_UTIL.getConfiguration()); // Set BucketCache and HFileBlock to log at trace level. setTraceLevel(BucketCache.class); setTraceLevel(HFileBlock.class); setTraceLevel(HFileReaderImpl.class); setTraceLevel(BucketAllocator.class); + // Clear out any cache between runs. + CacheConfig.GLOBAL_BLOCK_CACHE_INSTANCE = null; } // Assumes log4j logging. @@ -116,6 +120,49 @@ public class TestHFileBackedByBucketCache { setLevel(org.apache.log4j.Level.TRACE); } + private static String getBucketCacheDataFile() { + String bucketCacheDataFile = + (new Path(TEST_UTIL.getDataTestDir(), "bucketcache.data")).toString(); + // Create parent dirs + (new File(bucketCacheDataFile)).getParentFile().mkdirs(); + return bucketCacheDataFile; + } + + private static CacheConfig setupCacheConfig(final Configuration conf, + final String bucketCacheDataFile) { + conf.set("hbase.bucketcache.ioengine", "file:" + bucketCacheDataFile); + conf.set("hbase.bucketcache.persistent.path", bucketCacheDataFile + ".map"); + conf.setStrings("hbase.bucketcache.bucket.sizes", Integer.toString(BUCKETSIZE)); + // This is minimum bucketcache size.... 1MB. + conf.setInt("hbase.bucketcache.size", 1); + return new CacheConfig(conf); + } + + /** + * Test at the StoreFile level. This is the level at which we do blooms. Blooms should be in the + * L1 by default, not in L2. + */ + @Test + public void testBucketCacheBehindStoreFile() throws IOException { + // See above 'MATH! SIZING FOR THE TEST!' note on how we have it so only one bucket in our cache + String bucketCacheDataFile = getBucketCacheDataFile(); + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + CacheConfig cacheConfig = setupCacheConfig(conf, bucketCacheDataFile); + + // Write 8 entries which should make for four hfileBlocks. + final int count = 8; + final int hfileBlockCount = 4; + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), this.name.getMethodName()); + List writtenCells = + writeStoreFile(conf, hfilePath, Compression.Algorithm.NONE, cacheConfig, count); + CacheStats stats = cacheConfig.getBlockCache().getStats(); + List readCells = readStoreFile(conf, hfilePath, cacheConfig); + assertTrue(!writtenCells.isEmpty()); + assertEquals(writtenCells.size(), readCells.size()); + assertEquals(hfileBlockCount, stats.getMissCount()); + assertEquals(1, stats.getHitCount()); // readFile will read first block is from cache. + } + /** * Test that bucketcache is caching and that the persist of in-memory map works * @throws IOException @@ -126,22 +173,18 @@ public class TestHFileBackedByBucketCache { // hbase.bucketcache.persistent.path value to store the in-memory map of what is out in // the file-backed bucketcache. Set bucketcache to have one size only, BUCKETSIZE. // See "MATH! SIZING FOR THE TEST!" note above around declaration of BUCKETSIZE - String bucketCacheDataFile = - (new Path(TEST_UTIL.getDataTestDir(), "bucketcache.data")).toString(); - (new File(bucketCacheDataFile)).getParentFile().mkdirs(); - this.conf.set("hbase.bucketcache.ioengine", "file:" + bucketCacheDataFile); - this.conf.set("hbase.bucketcache.persistent.path", bucketCacheDataFile + ".map"); - this.conf.setStrings("hbase.bucketcache.bucket.sizes", Integer.toString(BUCKETSIZE)); - // This is minimum bucketcache size.... 1MB. - this.conf.setInt("hbase.bucketcache.size", 1); - CacheConfig cacheConfig = new CacheConfig(conf); - Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), this.name.getMethodName()); + String bucketCacheDataFile = getBucketCacheDataFile(); + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + CacheConfig cacheConfig = setupCacheConfig(conf, bucketCacheDataFile); + // Write 8 entries which should make for four hfileBlocks. final int count = 8; final int hfileBlockCount = 4; - List writtenCells = writeFile(hfilePath, Compression.Algorithm.NONE, cacheConfig, count); + Path hfilePath = new Path(TEST_UTIL.getDataTestDir(), this.name.getMethodName()); + List writtenCells = + writeHFile(conf, hfilePath, Compression.Algorithm.NONE, cacheConfig, count); CacheStats stats = cacheConfig.getBlockCache().getStats(); - List readCells = readFile(hfilePath, cacheConfig); + List readCells = readHFile(conf, hfilePath, cacheConfig); assertTrue(!writtenCells.isEmpty()); assertEquals(writtenCells.size(), readCells.size()); assertEquals(hfileBlockCount, stats.getMissCount()); @@ -156,7 +199,7 @@ public class TestHFileBackedByBucketCache { cacheConfig = new CacheConfig(conf); stats = cacheConfig.getBlockCache().getStats(); assertEquals(0, stats.getHitCachingCount()); - readCells = readFile(hfilePath, cacheConfig); + readCells = readHFile(conf, hfilePath, cacheConfig); // readFile will read all hfileblocs in the file, hfileBlockCount, and then one more, so + 1. assertEquals(hfileBlockCount + 1, stats.getHitCachingCount()); } @@ -166,7 +209,66 @@ public class TestHFileBackedByBucketCache { * @return The Cells written to the file. * @throws IOException */ - private List writeFile(final Path hfilePath, final Compression.Algorithm compressAlgo, + private List writeStoreFile(final Configuration conf, final Path hfilePath, + final Compression.Algorithm compressAlgo, + final CacheConfig cacheConfig, final int count) + throws IOException { + List cells = new ArrayList(count); + HFileContext hfileContext = new HFileContext(); + StoreFile.Writer writer = new StoreFile.WriterBuilder(conf, cacheConfig, fs). + withBloomType(BloomType.ROWCOL).withFilePath(hfilePath).withFileContext(hfileContext). + build(); + try { + byte [] valueBytes = new byte [VALUE_SIZE]; + for (int i = 0; i < valueBytes.length; i++) valueBytes[i] = '0'; + for (int i = 0; i < count; ++i) { + byte[] keyBytes = format(i); + KeyValue keyValue = new KeyValue(keyBytes, HConstants.CATALOG_FAMILY, keyBytes, + HConstants.LATEST_TIMESTAMP, valueBytes); + writer.append(keyValue); + cells.add(keyValue); + } + } finally { + writer.close(); + } + return cells; + } + + /** + * Read the whole file, then read the first block so we get something from cache for sure. + * So... there are TOTAL_BLOCKS_IN_FILE read + 1. See math at head of this class. + * @return The Cells read from the file. + */ + private List readStoreFile(final Configuration conf, final Path hfilePath, + final CacheConfig cacheConfig) + throws IOException { + List cells = new ArrayList(); + StoreFile.Reader reader = new StoreFile.Reader(this.fs, hfilePath, cacheConfig, conf); + StoreFileScanner scanner = reader.getStoreFileScanner(true, true); + try { + scanner.seek(CellUtil.createCell(HConstants.EMPTY_BYTE_ARRAY)); + Cell cell = null; + while ((cell = scanner.next()) != null) { + cells.add(cell); + LOG.info(cell); + } + // Do a random seek just so we see a block coming from cache. + scanner.seek(reader.getFirstKey()); + LOG.info(scanner.next()); + } finally { + scanner.close(); + reader.close(false); + } + return cells; + } + + /** + * Write a file with count entries. + * @return The Cells written to the file. + * @throws IOException + */ + private List writeHFile(final Configuration conf, final Path hfilePath, + final Compression.Algorithm compressAlgo, final CacheConfig cacheConfig, final int count) throws IOException { List cells = new ArrayList(count); @@ -195,10 +297,11 @@ public class TestHFileBackedByBucketCache { * So... there are TOTAL_BLOCKS_IN_FILE read + 1. See math at head of this class. * @return The Cells read from the file. */ - private List readFile(final Path hfilePath, final CacheConfig cacheConfig) + private List readHFile(final Configuration conf, final Path hfilePath, + final CacheConfig cacheConfig) throws IOException { List cells = new ArrayList(); - try (HFile.Reader reader = HFile.createReader(this.fs, hfilePath, cacheConfig, this.conf); + try (HFile.Reader reader = HFile.createReader(this.fs, hfilePath, cacheConfig, conf); HFileScanner scanner = reader.getScanner(true, true)) { scanner.seekTo(); do {