Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (revision 1213994) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV1.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.io.hfile.HFile.Reader; import org.apache.hadoop.hbase.io.hfile.HFile.Writer; @@ -212,6 +213,12 @@ String cacheKey = HFile.getBlockCacheKey(name, offset); + BlockCategory effectiveCategory = BlockCategory.META; + if (metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_META_KEY) || + metaBlockName.equals(HFileWriterV1.BLOOM_FILTER_DATA_KEY)) { + effectiveCategory = BlockCategory.BLOOM; + } + // Per meta key from any given file, synchronize reads for said block synchronized (metaBlockIndexReader.getRootBlockKey(block)) { metaLoads.incrementAndGet(); @@ -219,7 +226,7 @@ if (cacheConf.isBlockCacheEnabled()) { HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, - cacheConf.shouldCacheDataOnRead()); + cacheConf.shouldCacheBlockOnRead(effectiveCategory)); if (cachedBlock != null) { cacheHits.incrementAndGet(); return cachedBlock.getBufferWithoutHeader(); @@ -236,7 +243,7 @@ HFile.readOps.incrementAndGet(); // Cache the block - if (cacheConf.shouldCacheDataOnRead() && cacheBlock) { + if (cacheBlock && cacheConf.shouldCacheBlockOnRead(effectiveCategory)) { cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } @@ -310,7 +317,8 @@ HFile.readOps.incrementAndGet(); // Cache the block - if (cacheConf.shouldCacheDataOnRead() && cacheBlock) { + if (cacheBlock && cacheConf.shouldCacheBlockOnRead( + hfileBlock.getBlockType().getCategory())) { cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (revision 1213994) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (working copy) @@ -19,9 +19,7 @@ */ package org.apache.hadoop.hbase.io.hfile; -import java.io.ByteArrayInputStream; import java.io.DataInput; -import java.io.DataInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -32,6 +30,7 @@ import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; @@ -229,7 +228,7 @@ */ @Override public HFileBlock readBlock(long dataBlockOffset, long onDiskBlockSize, - boolean cacheBlock, boolean pread, final boolean isCompaction) + final boolean cacheBlock, boolean pread, final boolean isCompaction) throws IOException { if (dataBlockIndexReader == null) { throw new IOException("Block index not loaded"); @@ -251,8 +250,6 @@ try { blockLoads.incrementAndGet(); - // Check cache for block. If found return. - cacheBlock &= cacheConf.shouldCacheDataOnRead(); if (cacheConf.isBlockCacheEnabled()) { HFileBlock cachedBlock = (HFileBlock) cacheConf.getBlockCache().getBlock(cacheKey, cacheBlock); @@ -266,19 +263,21 @@ // Load block from filesystem. long startTimeNs = System.nanoTime(); - HFileBlock dataBlock = fsBlockReader.readBlockData(dataBlockOffset, + HFileBlock hfileBlock = fsBlockReader.readBlockData(dataBlockOffset, onDiskBlockSize, -1, pread); + BlockCategory blockCategory = hfileBlock.getBlockType().getCategory(); HFile.readTimeNano.addAndGet(System.nanoTime() - startTimeNs); HFile.readOps.incrementAndGet(); // Cache the block - if (cacheBlock) { - cacheConf.getBlockCache().cacheBlock(cacheKey, dataBlock, + if (cacheBlock && cacheConf.shouldCacheBlockOnRead( + hfileBlock.getBlockType().getCategory())) { + cacheConf.getBlockCache().cacheBlock(cacheKey, hfileBlock, cacheConf.isInMemory()); } - return dataBlock; + return hfileBlock; } finally { offsetLock.releaseLockEntry(lockEntry); } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java (revision 1213994) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/CacheConfig.java (working copy) @@ -28,6 +28,7 @@ import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.util.DirectMemoryUtils; import org.apache.hadoop.util.StringUtils; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; /** * Stores all of the cache objects and configuration for a single HFile. @@ -208,6 +209,19 @@ } /** + * Should we cache a block of a particular category? We always cache + * important blocks such as index blocks, as long as the block cache is + * available. + */ + public boolean shouldCacheBlockOnRead(BlockCategory category) { + boolean shouldCache = isBlockCacheEnabled() + && (cacheDataOnRead || + category == BlockCategory.INDEX || + category == BlockCategory.BLOOM); + return shouldCache; + } + + /** * @return true if blocks in this file should be flagged as in-memory */ public boolean isInMemory() { Index: src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java (revision 1213994) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/BlockType.java (working copy) @@ -37,49 +37,55 @@ // Scanned block section /** Data block, both versions */ - DATA("DATABLK*"), + DATA("DATABLK*", BlockCategory.DATA), /** Version 2 leaf index block. Appears in the data block section */ - LEAF_INDEX("IDXLEAF2"), + LEAF_INDEX("IDXLEAF2", BlockCategory.INDEX), /** Bloom filter block, version 2 */ - BLOOM_CHUNK("BLMFBLK2"), + BLOOM_CHUNK("BLMFBLK2", BlockCategory.BLOOM), // Non-scanned block section /** Meta blocks */ - META("METABLKc"), + META("METABLKc", BlockCategory.META), /** Intermediate-level version 2 index in the non-data block section */ - INTERMEDIATE_INDEX("IDXINTE2"), + INTERMEDIATE_INDEX("IDXINTE2", BlockCategory.INDEX), // Load-on-open section. /** Root index block, also used for the single-level meta index, version 2 */ - ROOT_INDEX("IDXROOT2"), + ROOT_INDEX("IDXROOT2", BlockCategory.INDEX), /** File info, version 2 */ - FILE_INFO("FILEINF2"), + FILE_INFO("FILEINF2", BlockCategory.META), /** Bloom filter metadata, version 2 */ - BLOOM_META("BLMFMET2"), + BLOOM_META("BLMFMET2", BlockCategory.BLOOM), // Trailer /** Fixed file trailer, both versions (always just a magic string) */ - TRAILER("TRABLK\"$"), + TRAILER("TRABLK\"$", BlockCategory.META), // Legacy blocks /** Block index magic string in version 1 */ - INDEX_V1("IDXBLK)+"); + INDEX_V1("IDXBLK)+", BlockCategory.INDEX); + public enum BlockCategory { + DATA, META, INDEX, BLOOM + } + public static final int MAGIC_LENGTH = 8; private final byte[] magic; + private final BlockCategory metricCat; - private BlockType(String magicStr) { + private BlockType(String magicStr, BlockCategory metricCat) { magic = Bytes.toBytes(magicStr); + this.metricCat = metricCat; assert magic.length == MAGIC_LENGTH; } @@ -95,6 +101,14 @@ buf.put(magic); } + public String getMetricName(){ + return metricCat.toString(); + } + + public BlockCategory getCategory(){ + return metricCat; + } + public static BlockType parse(byte[] buf, int offset, int length) throws IOException { if (length != MAGIC_LENGTH) {