.../org/apache/hadoop/hbase/client/HTable.java | 6 +- .../client/RpcRetryingCallerWithReadReplicas.java | 2 +- .../apache/hadoop/hbase/ipc/AbstractRpcClient.java | 4 +- .../apache/hadoop/hbase/ipc/AsyncRpcChannel.java | 1 + .../apache/hadoop/hbase/ipc/AsyncRpcClient.java | 4 +- .../java/org/apache/hadoop/hbase/ipc/IPCUtil.java | 103 +- .../hbase/ipc/PayloadCarryingRpcController.java | 72 ++ .../org/apache/hadoop/hbase/ipc/RpcClientImpl.java | 1 + .../org/apache/hadoop/hbase/ipc/TestIPCUtil.java | 42 +- .../hadoop/hbase/SharedMemoryBackedCell.java | 37 + .../hadoop/hbase/io/ByteBufferOutputStream.java | 18 +- .../apache/hadoop/hbase/io/hfile/HFileContext.java | 22 +- .../hadoop/hbase/io/hfile/HFileContextBuilder.java | 9 +- .../hadoop/hbase/io/HalfStoreFileReader.java | 11 + .../apache/hadoop/hbase/io/hfile/BlockCache.java | 19 + .../hadoop/hbase/io/hfile/CombinedBlockCache.java | 26 + .../org/apache/hadoop/hbase/io/hfile/HFile.java | 10 +- .../apache/hadoop/hbase/io/hfile/HFileBlock.java | 32 +- .../hadoop/hbase/io/hfile/HFileBlockIndex.java | 126 +- .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 338 +++-- .../apache/hadoop/hbase/io/hfile/HFileScanner.java | 8 +- .../hadoop/hbase/io/hfile/LruBlockCache.java | 12 +- .../hadoop/hbase/io/hfile/MemcachedBlockCache.java | 6 + .../hadoop/hbase/io/hfile/bucket/BucketCache.java | 85 +- .../hbase/io/hfile/bucket/ByteBufferIOEngine.java | 12 +- .../hadoop/hbase/io/hfile/bucket/FileIOEngine.java | 11 +- .../hadoop/hbase/io/hfile/bucket/IOEngine.java | 10 +- .../org/apache/hadoop/hbase/ipc/CallRunner.java | 22 +- .../org/apache/hadoop/hbase/ipc/RpcServer.java | 60 +- .../hadoop/hbase/ipc/RpcServerInterface.java | 12 +- .../hadoop/hbase/quotas/DefaultOperationQuota.java | 5 + .../hadoop/hbase/quotas/NoopOperationQuota.java | 5 + .../apache/hadoop/hbase/quotas/OperationQuota.java | 6 + .../hbase/regionserver/BatchReturnableScanner.java | 33 + .../hadoop/hbase/regionserver/GetContext.java | 49 + .../apache/hadoop/hbase/regionserver/HRegion.java | 189 ++- .../hadoop/hbase/regionserver/KeyValueHeap.java | 52 +- .../hadoop/hbase/regionserver/KeyValueScanner.java | 2 +- .../hbase/regionserver/NonLazyKeyValueScanner.java | 4 + .../hadoop/hbase/regionserver/RSRpcServices.java | 99 +- .../hbase/regionserver/ReversedKeyValueHeap.java | 6 +- .../regionserver/ReversedRegionScannerImpl.java | 5 +- .../hadoop/hbase/regionserver/StoreFile.java | 12 +- .../hbase/regionserver/StoreFileScanner.java | 19 +- .../hadoop/hbase/regionserver/StoreScanner.java | 87 +- .../hadoop/hbase/util/CompoundBloomFilter.java | 2 + .../apache/hadoop/hbase/HBaseTestingUtility.java | 18 + .../apache/hadoop/hbase/TestAcidGuarantees.java | 4 + .../hbase/client/TestBlockEvictionFromClient.java | 1320 ++++++++++++++++++++ .../hbase/io/TestByteBufferOutputStream.java | 2 +- .../hadoop/hbase/io/hfile/TestCacheOnWrite.java | 30 + .../apache/hadoop/hbase/io/hfile/TestHFile.java | 2 +- .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java | 4 + .../io/hfile/bucket/TestByteBufferIOEngine.java | 6 +- .../hbase/io/hfile/bucket/TestFileIOEngine.java | 6 +- .../apache/hadoop/hbase/ipc/AbstractTestIPC.java | 8 +- .../hadoop/hbase/ipc/TestRpcHandlerException.java | 4 +- .../hbase/regionserver/TestHeapMemoryManager.java | 6 + .../hbase/regionserver/TestKeyValueHeap.java | 21 +- .../regionserver/TestScannerHeartbeatMessages.java | 16 +- 60 files changed, 2721 insertions(+), 422 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java index 686aaa8..9eb811a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java @@ -38,6 +38,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -61,6 +62,7 @@ import org.apache.hadoop.hbase.ipc.RegionCoprocessorRpcChannel; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.RequestConverter; +import org.apache.hadoop.hbase.protobuf.ResponseConverter; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiRequest; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest; @@ -596,7 +598,7 @@ public class HTable implements HTableInterface { try { ClientProtos.GetResponse response = getStub().get(controller, request); if (!response.hasResult()) return null; - return ProtobufUtil.toResult(response.getResult()); + return ProtobufUtil.toResult(response.getResult(), controller.cellScanner()); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } @@ -712,7 +714,7 @@ public class HTable implements HTableInterface { try { ClientProtos.GetResponse response = getStub().get(controller, request); if (response == null) return null; - return ProtobufUtil.toResult(response.getResult()); + return ProtobufUtil.toResult(response.getResult(), controller.cellScanner()); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java index 8f28881..f587a96 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java @@ -161,7 +161,7 @@ public class RpcRetryingCallerWithReadReplicas { if (response == null) { return null; } - return ProtobufUtil.toResult(response.getResult()); + return ProtobufUtil.toResult(response.getResult(), controller.cellScanner()); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java index 9be370d..e59478f 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java @@ -57,7 +57,6 @@ public abstract class AbstractRpcClient implements RpcClient { protected final SocketAddress localAddr; protected UserProvider userProvider; - protected final IPCUtil ipcUtil; protected final int minIdleTimeBeforeClose; // if the connection is idle for more than this // time (in ms), it will be closed at any moment. @@ -72,6 +71,7 @@ public abstract class AbstractRpcClient implements RpcClient { protected final int connectTO; protected final int readTO; protected final int writeTO; + protected final IPCUtil ipcUtil; /** * Construct an IPC client for the cluster clusterId @@ -89,10 +89,10 @@ public abstract class AbstractRpcClient implements RpcClient { HConstants.DEFAULT_HBASE_CLIENT_PAUSE); this.maxRetries = conf.getInt("hbase.ipc.client.connect.max.retries", 0); this.tcpNoDelay = conf.getBoolean("hbase.ipc.client.tcpnodelay", true); - this.ipcUtil = new IPCUtil(conf); this.minIdleTimeBeforeClose = conf.getInt(IDLE_TIME, 120000); // 2 minutes this.conf = conf; + this.ipcUtil = IPCUtil.createInstance(conf); this.codec = getCodec(); this.compressor = getCompressor(conf); this.fallbackAllowed = conf.getBoolean(IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY, diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java index cfc8b1b..be8a375 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java @@ -48,6 +48,7 @@ import javax.security.sasl.SaslException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ipc.IPCUtil; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.ConnectionClosingException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java index 2e4d0a6..91e51fa 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java @@ -316,7 +316,7 @@ public class AsyncRpcClient extends AbstractRpcClient { * @throws java.io.IOException on error on creation cell scanner */ public CellScanner createCellScanner(byte[] cellBlock) throws IOException { - return ipcUtil.createCellScanner(this.codec, this.compressor, cellBlock); + return this.ipcUtil.createCellScanner(this.codec, this.compressor, cellBlock); } /** @@ -327,7 +327,7 @@ public class AsyncRpcClient extends AbstractRpcClient { * @throws java.io.IOException if block creation fails */ public ByteBuffer buildCellBlock(CellScanner cells) throws IOException { - return ipcUtil.buildCellBlock(this.codec, this.compressor, cells); + return this.ipcUtil.buildCellBlock(this.codec, this.compressor, cells); } /** diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java index 056ecbc..50a05cc 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.nio.ByteBuffer; +import java.util.ArrayList; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; @@ -30,12 +31,12 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.codec.Codec; import org.apache.hadoop.hbase.io.BoundedByteBufferPool; import org.apache.hadoop.hbase.io.ByteBufferOutputStream; -import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; import org.apache.hadoop.io.compress.CodecPool; @@ -52,7 +53,7 @@ import com.google.protobuf.Message; * Utility to help ipc'ing. */ @InterfaceAudience.Private -public class IPCUtil { +public final class IPCUtil { // LOG is being used in TestIPCUtil public static final Log LOG = LogFactory.getLog(IPCUtil.class); /** @@ -61,17 +62,21 @@ public class IPCUtil { private final int cellBlockDecompressionMultiplier; private final int cellBlockBuildingInitialBufferSize; private final Configuration conf; + private static IPCUtil INSTANCE = null; - public IPCUtil(final Configuration conf) { - super(); + private IPCUtil(Configuration conf) { this.conf = conf; - this.cellBlockDecompressionMultiplier = - conf.getInt("hbase.ipc.cellblock.decompression.buffersize.multiplier", 3); + this.cellBlockDecompressionMultiplier = conf.getInt( + "hbase.ipc.cellblock.decompression.buffersize.multiplier", 3); + // Guess that 16k is a good size for rpc buffer. Could go bigger. See the + // TODO below in #buildCellBlock. + this.cellBlockBuildingInitialBufferSize = ClassSize.align(conf.getInt( + "hbase.ipc.cellblock.building.initial.buffersize", 16 * 1024)); + }; - // Guess that 16k is a good size for rpc buffer. Could go bigger. See the TODO below in - // #buildCellBlock. - this.cellBlockBuildingInitialBufferSize = - ClassSize.align(conf.getInt("hbase.ipc.cellblock.building.initial.buffersize", 16 * 1024)); + public static IPCUtil createInstance(Configuration conf) { + INSTANCE = new IPCUtil(conf); + return INSTANCE; } /** @@ -97,6 +102,10 @@ public class IPCUtil { return buildCellBlock(codec, compressor, cellScanner, null); } + public static IPCUtil getInstance() { + return INSTANCE; + } + /** * Puts CellScanner Cells into a cell block using passed in codec and/or * compressor. @@ -118,24 +127,10 @@ public class IPCUtil { throws IOException { if (cellScanner == null) return null; if (codec == null) throw new CellScannerButNoCodecException(); - int bufferSize = this.cellBlockBuildingInitialBufferSize; - ByteBufferOutputStream baos = null; - if (pool != null) { - ByteBuffer bb = pool.getBuffer(); - bufferSize = bb.capacity(); - baos = new ByteBufferOutputStream(bb); - } else { - // Then we need to make our own to return. - if (cellScanner instanceof HeapSize) { - long longSize = ((HeapSize)cellScanner).heapSize(); - // Just make sure we don't have a size bigger than an int. - if (longSize > Integer.MAX_VALUE) { - throw new IOException("Size " + longSize + " > " + Integer.MAX_VALUE); - } - bufferSize = ClassSize.align((int)longSize); - } - baos = new ByteBufferOutputStream(bufferSize); - } + ByteBufferOutputStream baos = createCellBlockStream(-1, pool); // We don't know the actual + // cell block size before + // hand. So just pass -1. + int bufferSize = baos.getByteBuffer().capacity(); OutputStream os = baos; Compressor poolCompressor = null; try { @@ -164,7 +159,55 @@ public class IPCUtil { "; up hbase.ipc.cellblock.building.initial.buffersize?"); } } - return baos.getByteBuffer(); + return baos.flipAndReturnBuffer(); + } + + @SuppressWarnings("resource") + public ByteBuffer buildCellBlock(Codec codec, CompressionCodec compressor, + ArrayList results, int bufferSize, BoundedByteBufferPool pool) + throws IOException { + if (results == null || results.isEmpty()) return null; + ByteBufferOutputStream baos = createCellBlockStream(bufferSize, pool); + Compressor poolCompressor = null; + OutputStream os = baos; + try { + if (compressor != null) { + if (compressor instanceof Configurable) + ((Configurable) compressor).setConf(conf); + poolCompressor = CodecPool.getCompressor(compressor); + os = compressor.createOutputStream(os, poolCompressor); + } + Codec.Encoder encoder = codec.getEncoder(os); + for (int i = 0; i < results.size(); i++) { + encoder.write(results.get(i)); + } + encoder.flush(); + } finally { + os.close(); + if (poolCompressor != null) + CodecPool.returnCompressor(poolCompressor); + } + return baos.flipAndReturnBuffer(); + } + + private ByteBufferOutputStream createCellBlockStream(int bufferSize, BoundedByteBufferPool pool) + throws IOException { + if (bufferSize < 0) { + bufferSize = cellBlockBuildingInitialBufferSize; + } + ByteBufferOutputStream baos = null; + if (pool != null) { + // TODO make use of totalCellSize. We know the size needed already. We have + // to add API in BoundedByteBufferPool also. + ByteBuffer bb = pool.getBuffer(); + // Dead code + bufferSize = bb.capacity(); + baos = new ByteBufferOutputStream(bb); + } else { + // Then we need to make our own to return. + baos = new ByteBufferOutputStream(bufferSize); + } + return baos; } /** @@ -208,7 +251,7 @@ public class IPCUtil { this.cellBlockDecompressionMultiplier); IOUtils.copy(cis, bbos); bbos.close(); - ByteBuffer bb = bbos.getByteBuffer(); + ByteBuffer bb = bbos.flipAndReturnBuffer(); is = new ByteArrayInputStream(bb.array(), 0, bb.limit()); } finally { if (is != null) is.close(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java index 09f4323..bb81e32 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java @@ -17,14 +17,21 @@ */ package org.apache.hadoop.hbase.ipc; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScannable; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.io.BoundedByteBufferPool; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.codec.Codec; +import org.apache.hadoop.io.compress.CompressionCodec; /** * Optionally carries Cells across the proxy/service interface down into ipc. On its @@ -52,6 +59,30 @@ public class PayloadCarryingRpcController */ private CellScanner cellScanner; + /** + * The payload can be set either as a CellScanner or as a Buffer of Cell data ready to be + * serialized over wire. When payload is set as block buffer, we must set meta data about this + * block ie. number of cells in this block. + */ + private ByteBuffer cellBlock; + private int cellsCountInBlock; + + private BoundedByteBufferPool reservoir = null; + private Codec codec = null; + private CompressionCodec compressionCodec = null; + + public void setReservoir(final BoundedByteBufferPool pool) { + this.reservoir = pool; + } + + public void setCodec(Codec codec) { + this.codec = codec; + } + + public void setCompressionCodec(CompressionCodec codec) { + this.compressionCodec = codec; + } + public PayloadCarryingRpcController() { this((CellScanner)null); } @@ -65,6 +96,31 @@ public class PayloadCarryingRpcController } /** + * Builds a cellblock from the passed cells + * @param cells + * @param totalCellsSize + * @throws IOException + */ + public void setCellBlock(final ArrayList cells, int totalCellsSize) throws IOException { + this.cellBlock = IPCUtil.getInstance().buildCellBlock(this.codec, this.compressionCodec, + cells, totalCellsSize, this.reservoir); + } + + /** + * Builds a cellblock from the cellScanner + * @param cellScanner the cell scanner to create the cellblock + * @throws IOException + */ + public void setCellBlock(final CellScanner cellScanner) throws IOException { + cellBlock = IPCUtil.getInstance().buildCellBlock(this.codec, this.compressionCodec, cellScanner, + reservoir); + } + + public ByteBuffer getCellBlock() { + return this.cellBlock; + } + + /** * @return One-shot cell scanner (you cannot back it up and restart) */ public CellScanner cellScanner() { @@ -75,6 +131,12 @@ public class PayloadCarryingRpcController this.cellScanner = cellScanner; } + public void clearPayload() { + this.cellScanner = null; + this.cellBlock = null; + this.cellsCountInBlock = 0; + } + /** * @param priority Priority for this request; should fall roughly in the range * {@link HConstants#NORMAL_QOS} to {@link HConstants#HIGH_QOS} @@ -102,5 +164,15 @@ public class PayloadCarryingRpcController super.reset(); priority = 0; cellScanner = null; + this.cellBlock = null; + this.cellsCountInBlock = 0; + } + + public void setCellsCountInBlock(int numberOfCells) { + this.cellsCountInBlock = numberOfCells; + } + + public int getCellsCountInBlock() { + return this.cellsCountInBlock; } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java index 9a5fc14..9921f12 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ipc.IPCUtil; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.codec.Codec; diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java index 163be70..39bd039 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java @@ -31,7 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.CellUtil; -import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.ipc.IPCUtil; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.codec.Codec; import org.apache.hadoop.hbase.codec.KeyValueCodec; @@ -53,33 +53,32 @@ public class TestIPCUtil { private static final Log LOG = LogFactory.getLog(TestIPCUtil.class); - IPCUtil util; @Before public void before() { - this.util = new IPCUtil(new Configuration()); + IPCUtil.createInstance(new Configuration()); } - + @Test public void testBuildCellBlock() throws IOException { - doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), null); - doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), new DefaultCodec()); - doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), new GzipCodec()); + doBuildCellBlockUndoCellBlock(new KeyValueCodec(), null); + doBuildCellBlockUndoCellBlock(new KeyValueCodec(), new DefaultCodec()); + doBuildCellBlockUndoCellBlock(new KeyValueCodec(), new GzipCodec()); } - static void doBuildCellBlockUndoCellBlock(final IPCUtil util, - final Codec codec, final CompressionCodec compressor) + static void doBuildCellBlockUndoCellBlock(final Codec codec, final CompressionCodec compressor) throws IOException { - doBuildCellBlockUndoCellBlock(util, codec, compressor, 10, 1, false); + doBuildCellBlockUndoCellBlock(codec, compressor, 10, 1, false); } - static void doBuildCellBlockUndoCellBlock(final IPCUtil util, final Codec codec, + static void doBuildCellBlockUndoCellBlock(final Codec codec, final CompressionCodec compressor, final int count, final int size, final boolean sized) throws IOException { Cell [] cells = getCells(count, size); CellScanner cellScanner = sized? getSizedCellScanner(cells): CellUtil.createCellScanner(Arrays.asList(cells).iterator()); - ByteBuffer bb = util.buildCellBlock(codec, compressor, cellScanner); - cellScanner = util.createCellScanner(codec, compressor, bb.array(), 0, bb.limit()); + ByteBuffer bb = IPCUtil.getInstance().buildCellBlock(codec, compressor, cellScanner); + cellScanner = IPCUtil.getInstance().createCellScanner(codec, compressor, bb.array(), 0, + bb.limit()); int i = 0; while (cellScanner.advance()) { i++; @@ -143,14 +142,14 @@ public class TestIPCUtil { System.exit(errCode); } - private static void timerTests(final IPCUtil util, final int count, final int size, + private static void timerTests(final int count, final int size, final Codec codec, final CompressionCodec compressor) throws IOException { final int cycles = 1000; StopWatch timer = new StopWatch(); timer.start(); for (int i = 0; i < cycles; i++) { - timerTest(util, timer, count, size, codec, compressor, false); + timerTest(timer, count, size, codec, compressor, false); } timer.stop(); LOG.info("Codec=" + codec + ", compression=" + compressor + ", sized=" + false + @@ -158,17 +157,17 @@ public class TestIPCUtil { timer.reset(); timer.start(); for (int i = 0; i < cycles; i++) { - timerTest(util, timer, count, size, codec, compressor, true); + timerTest(timer, count, size, codec, compressor, true); } timer.stop(); LOG.info("Codec=" + codec + ", compression=" + compressor + ", sized=" + true + ", count=" + count + ", size=" + size + ", + took=" + timer.getTime() + "ms"); } - private static void timerTest(final IPCUtil util, final StopWatch timer, final int count, + private static void timerTest(final StopWatch timer, final int count, final int size, final Codec codec, final CompressionCodec compressor, final boolean sized) throws IOException { - doBuildCellBlockUndoCellBlock(util, codec, compressor, count, size, sized); + doBuildCellBlockUndoCellBlock(codec, compressor, count, size, sized); } /** @@ -188,10 +187,9 @@ public class TestIPCUtil { usage(1); } } - IPCUtil util = new IPCUtil(HBaseConfiguration.create()); ((Log4JLogger)IPCUtil.LOG).getLogger().setLevel(Level.ALL); - timerTests(util, count, size, new KeyValueCodec(), null); - timerTests(util, count, size, new KeyValueCodec(), new DefaultCodec()); - timerTests(util, count, size, new KeyValueCodec(), new GzipCodec()); + timerTests(count, size, new KeyValueCodec(), null); + timerTests(count, size, new KeyValueCodec(), new DefaultCodec()); + timerTests(count, size, new KeyValueCodec(), new GzipCodec()); } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java new file mode 100644 index 0000000..f50fc00 --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * Any Cell implementing this interface means its data is in shared common cache memory. When we + * read data from such cache, we will make a cell referring to the same memory and will avoid any copy. + */ +@InterfaceAudience.Private +public interface SharedMemoryBackedCell extends Cell { + + /** + * Copy the cell's data from shared cache memory to a dedicated memory area + * and return as a new Cell. This is useful when we want to consume the Cell + * before even the block referring to these cells are evicted. + * + * @return A copy of the Cell + */ + Cell copy(); +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java index af12113..5e36848 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java @@ -25,6 +25,8 @@ import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.WritableByteChannel; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.util.Bytes; @@ -36,6 +38,7 @@ import org.apache.hadoop.hbase.util.Bytes; @InterfaceStability.Evolving public class ByteBufferOutputStream extends OutputStream { + private static final Log LOG = LogFactory.getLog(ByteBufferOutputStream.class); protected ByteBuffer buf; public ByteBufferOutputStream(int capacity) { @@ -50,11 +53,12 @@ public class ByteBufferOutputStream extends OutputStream { * @param bb ByteBuffer to use. If too small, will be discarded and a new one allocated in its * place; i.e. the passed in BB may NOT BE RETURNED!! Minimally it will be altered. SIDE EFFECT!! * If you want to get the newly allocated ByteBuffer, you'll need to pick it up when - * done with this instance by calling {@link #getByteBuffer()}. All this encapsulation violation + * done with this instance by calling {@link #flipAndReturnBuffer()}. + * All this encapsulation violation * is so we can recycle buffers rather than allocate each time; it can get expensive especially * if the buffers are big doing allocations each time or having them undergo resizing because * initial allocation was small. - * @see #getByteBuffer() + * @see #flipAndReturnBuffer() */ public ByteBufferOutputStream(final ByteBuffer bb) { this.buf = bb; @@ -73,11 +77,19 @@ public class ByteBufferOutputStream extends OutputStream { * This flips the underlying BB so be sure to use it _last_! * @return ByteBuffer */ - public ByteBuffer getByteBuffer() { + public ByteBuffer flipAndReturnBuffer() { buf.flip(); return buf; } + /** + * This flips the underlying BB so be sure to use it _last_! + * @return ByteBuffer + */ + public ByteBuffer getByteBuffer() { + return buf; + } + private void checkSizeAndGrow(int extra) { if ( (buf.position() + extra) > buf.limit()) { // size calculation is complex, because we could overflow negative, diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java index 5edd47d..a1f105a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java @@ -56,6 +56,7 @@ public class HFileContext implements HeapSize, Cloneable { /** Encryption algorithm and key used */ private Encryption.Context cryptoContext = Encryption.Context.NONE; private long fileCreateTime; + private String hfileName; //Empty constructor. Go with setters public HFileContext() { @@ -77,12 +78,13 @@ public class HFileContext implements HeapSize, Cloneable { this.encoding = context.encoding; this.cryptoContext = context.cryptoContext; this.fileCreateTime = context.fileCreateTime; + this.hfileName = context.hfileName; } public HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags, Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType, int bytesPerChecksum, int blockSize, DataBlockEncoding encoding, - Encryption.Context cryptoContext, long fileCreateTime) { + Encryption.Context cryptoContext, long fileCreateTime, String hfileName) { this.usesHBaseChecksum = useHBaseChecksum; this.includesMvcc = includesMvcc; this.includesTags = includesTags; @@ -96,6 +98,7 @@ public class HFileContext implements HeapSize, Cloneable { } this.cryptoContext = cryptoContext; this.fileCreateTime = fileCreateTime; + this.hfileName = hfileName; } /** @@ -187,6 +190,14 @@ public class HFileContext implements HeapSize, Cloneable { this.cryptoContext = cryptoContext; } + public String getHFileName() { + return this.hfileName; + } + + public void setHFileName(String name) { + this.hfileName = name; + } + /** * HeapSize implementation * NOTE : The heapsize should be altered as and when new state variable are added @@ -196,11 +207,14 @@ public class HFileContext implements HeapSize, Cloneable { public long heapSize() { long size = ClassSize.align(ClassSize.OBJECT + // Algorithm reference, encodingon, checksumtype, Encryption.Context reference - 4 * ClassSize.REFERENCE + + 5 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + // usesHBaseChecksum, includesMvcc, includesTags and compressTags 4 * Bytes.SIZEOF_BOOLEAN + Bytes.SIZEOF_LONG); + if (this.hfileName != null) { + size += ClassSize.STRING + this.hfileName.length(); + } return size; } @@ -227,6 +241,10 @@ public class HFileContext implements HeapSize, Cloneable { sb.append(" compressAlgo="); sb.append(compressAlgo); sb.append(" compressTags="); sb.append(compressTags); sb.append(" cryptoContext=[ "); sb.append(cryptoContext); sb.append(" ]"); + if (hfileName != null) { + sb.append(" name="); + sb.append(hfileName); + } sb.append(" ]"); return sb.toString(); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java index a903974..19a323a 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java @@ -53,6 +53,8 @@ public class HFileContextBuilder { private Encryption.Context cryptoContext = Encryption.Context.NONE; private long fileCreateTime = 0; + private String hfileName = null; + public HFileContextBuilder withHBaseCheckSum(boolean useHBaseCheckSum) { this.usesHBaseChecksum = useHBaseCheckSum; return this; @@ -108,9 +110,14 @@ public class HFileContextBuilder { return this; } + public HFileContextBuilder withPathName(String name) { + this.hfileName = name; + return this; + } + public HFileContext build() { return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression, compressTags, checksumType, bytesPerChecksum, blocksize, encoding, cryptoContext, - fileCreateTime); + fileCreateTime, hfileName); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java index a95da7b..6b969f9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java @@ -288,6 +288,17 @@ public class HalfStoreFileReader extends StoreFile.Reader { public Cell getNextIndexedKey() { return null; } + + @Override + public void returnCurrentBatch() { + this.delegate.returnCurrentBatch(); + } + + @Override + public void close() { + this.delegate.close(); + } + }; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index 57c4be9..0baa6b0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -116,4 +116,23 @@ public interface BlockCache extends Iterable { * @return The list of sub blockcaches that make up this one; returns null if no sub caches. */ BlockCache [] getBlockCaches(); + + /** + * Called when the block usage is over + * @param cacheKey + * @param block + * @return true if returned block happened, false otherwise + */ + boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block); + + /** + * An enum that indicates if the block retrieved from the cache is from shared memory cache or + * non shared memory. When we say shared memory it means we try to server the reads from those + * cache without doing any copy and all the cells belonging to these blocks cannot be evicted + * till the results are consumed. For the non-shared memory case, we don't have any such + * constraint + */ + public static enum MemoryType { + SHARED_MEM, NON_SHARED_MEM; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 7725cf9..affda8b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -23,8 +23,11 @@ import java.util.Iterator; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; +import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import com.google.common.annotations.VisibleForTesting; + /** * CombinedBlockCache is an abstraction layer that combines @@ -219,4 +222,27 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { public void setMaxSize(long size) { this.lruCache.setMaxSize(size); } + + @Override + public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) { + assert block.getCacheType() != CacheType.NOT_CACHED; + if (block.getCacheType() == CacheType.L1_CACHED) { + // At the time when this block was served, it was in L1 cache. Even if it is transferred to L2 + // cache by this time, no need to contact L2 cache. + return this.lruCache.returnBlock(cacheKey, block); + } else if (block.getCacheType() == CacheType.L2_CACHED) { + return this.l2Cache.returnBlock(cacheKey, block); + } + return true; + } + + @VisibleForTesting + public int getRefCount(BlockCacheKey cacheKey) { + // Check this + if (this.l2Cache != null) { + return ((BucketCache) this.l2Cache).getRefCount(cacheKey); + } else { + return 0; + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 6c8260d..b2975b8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -26,7 +26,6 @@ import java.io.DataOutputStream; import java.io.IOException; import java.io.SequenceInputStream; import java.net.InetSocketAddress; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -40,7 +39,6 @@ import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.hbase.util.ByteStringer; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -55,6 +53,7 @@ import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos; @@ -373,6 +372,11 @@ public class HFile { final boolean updateCacheMetrics, BlockType expectedBlockType, DataBlockEncoding expectedDataBlockEncoding) throws IOException; + /** + * Return the given block back to the cache, if it was obtained from cache. + * @param block Block to be returned. + */ + void returnBlock(HFileBlock block); } /** An interface used by clients to open and iterate an {@link HFile}. */ @@ -388,7 +392,7 @@ public class HFile { HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction); - ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; + HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException; Map loadFileInfo() throws IOException; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index a64bb94..3b69dc5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext; import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ChecksumType; @@ -192,6 +193,9 @@ public class HFileBlock implements Cacheable { */ private int nextBlockOnDiskSizeWithHeader = -1; + private CacheType cacheType = CacheType.NOT_CACHED; + private MemoryType memType = MemoryType.NON_SHARED_MEM; + /** * Creates a new {@link HFile} block from the given fields. This constructor * is mostly used when the block data has already been read and uncompressed, @@ -416,8 +420,10 @@ public class HFileBlock implements Cacheable { sanityCheckAssertion(buf.getLong(), prevBlockOffset, "prevBlocKOffset"); if (this.fileContext.isUseHBaseChecksum()) { - sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), "checksumType"); - sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), "bytesPerChecksum"); + sanityCheckAssertion(buf.get(), this.fileContext.getChecksumType().getCode(), + "checksumType"); + sanityCheckAssertion(buf.getInt(), this.fileContext.getBytesPerChecksum(), + "bytesPerChecksum"); sanityCheckAssertion(buf.getInt(), onDiskDataSizeWithHeader, "onDiskDataSizeWithHeader"); } @@ -637,7 +643,7 @@ public class HFileBlock implements Cacheable { long size = ClassSize.align( ClassSize.OBJECT + // Block type, byte buffer and meta references - 3 * ClassSize.REFERENCE + + 5 * ClassSize.REFERENCE + // On-disk size, uncompressed size, and next block's on-disk size // bytePerChecksum and onDiskDataSize 4 * Bytes.SIZEOF_INT + @@ -1871,6 +1877,26 @@ public class HFileBlock implements Cacheable { return this.fileContext; } + public void setCacheType(CacheType cacheType) { + this.cacheType = cacheType; + } + + public void setMemoryType(MemoryType memType) { + this.memType = memType; + } + + public MemoryType getMemoryType() { + return this.memType; + } + + public CacheType getCacheType() { + return this.cacheType; + } + + public static enum CacheType { + L1_CACHED, L2_CACHED, NOT_CACHED; + } + /** * Convert the contents of the block header into a human readable string. * This is mostly helpful for debugging. This assumes that the block diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index e6e1fff..fbc14c0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicReference; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.hbase.Cell; @@ -40,6 +39,7 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader; @@ -235,76 +235,85 @@ public class HFileBlockIndex { int lookupLevel = 1; // How many levels deep we are in our lookup. int index = -1; - HFileBlock block; + HFileBlock block = null; + boolean dataBlock = false; while (true) { - - if (currentBlock != null && currentBlock.getOffset() == currentOffset) - { - // Avoid reading the same block again, even with caching turned off. - // This is crucial for compaction-type workload which might have - // caching turned off. This is like a one-block cache inside the - // scanner. - block = currentBlock; - } else { - // Call HFile's caching block reader API. We always cache index - // blocks, otherwise we might get terrible performance. - boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); - BlockType expectedBlockType; - if (lookupLevel < searchTreeLevel - 1) { - expectedBlockType = BlockType.INTERMEDIATE_INDEX; - } else if (lookupLevel == searchTreeLevel - 1) { - expectedBlockType = BlockType.LEAF_INDEX; + try { + if (currentBlock != null && currentBlock.getOffset() == currentOffset) { + // Avoid reading the same block again, even with caching turned off. + // This is crucial for compaction-type workload which might have + // caching turned off. This is like a one-block cache inside the + // scanner. + block = currentBlock; } else { - // this also accounts for ENCODED_DATA - expectedBlockType = BlockType.DATA; + // Call HFile's caching block reader API. We always cache index + // blocks, otherwise we might get terrible performance. + boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel); + BlockType expectedBlockType; + if (lookupLevel < searchTreeLevel - 1) { + expectedBlockType = BlockType.INTERMEDIATE_INDEX; + } else if (lookupLevel == searchTreeLevel - 1) { + expectedBlockType = BlockType.LEAF_INDEX; + } else { + // this also accounts for ENCODED_DATA + expectedBlockType = BlockType.DATA; + } + block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache, + pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding); } - block = cachingBlockReader.readBlock(currentOffset, - currentOnDiskSize, shouldCache, pread, isCompaction, true, - expectedBlockType, expectedDataBlockEncoding); - } - if (block == null) { - throw new IOException("Failed to read block at offset " + - currentOffset + ", onDiskSize=" + currentOnDiskSize); - } + if (block == null) { + throw new IOException("Failed to read block at offset " + currentOffset + + ", onDiskSize=" + currentOnDiskSize); + } - // Found a data block, break the loop and check our level in the tree. - if (block.getBlockType().isData()) { - break; - } + // Found a data block, break the loop and check our level in the tree. + if (block.getBlockType().isData()) { + dataBlock = true; + break; + } - // Not a data block. This must be a leaf-level or intermediate-level - // index block. We don't allow going deeper than searchTreeLevel. - if (++lookupLevel > searchTreeLevel) { - throw new IOException("Search Tree Level overflow: lookupLevel="+ - lookupLevel + ", searchTreeLevel=" + searchTreeLevel); - } + // Not a data block. This must be a leaf-level or intermediate-level + // index block. We don't allow going deeper than searchTreeLevel. + if (++lookupLevel > searchTreeLevel) { + throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel + + ", searchTreeLevel=" + searchTreeLevel); + } - // Locate the entry corresponding to the given key in the non-root - // (leaf or intermediate-level) index block. - ByteBuffer buffer = block.getBufferWithoutHeader(); - index = locateNonRootIndexEntry(buffer, key, comparator); - if (index == -1) { - // This has to be changed - // For now change this to key value - KeyValue kv = KeyValueUtil.ensureKeyValue(key); - throw new IOException("The key " - + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength()) - + " is before the" + " first key of the non-root index block " - + block); - } + // Locate the entry corresponding to the given key in the non-root + // (leaf or intermediate-level) index block. + ByteBuffer buffer = block.getBufferWithoutHeader(); + index = locateNonRootIndexEntry(buffer, key, comparator); + if (index == -1) { + // This has to be changed + // For now change this to key value + KeyValue kv = KeyValueUtil.ensureKeyValue(key); + throw new IOException("The key " + + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength()) + + " is before the" + " first key of the non-root index block " + block); + } - currentOffset = buffer.getLong(); - currentOnDiskSize = buffer.getInt(); + currentOffset = buffer.getLong(); + currentOnDiskSize = buffer.getInt(); - // Only update next indexed key if there is a next indexed key in the current level - byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1); - if (tmpNextIndexedKey != null) { - nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey); + // Only update next indexed key if there is a next indexed key in the + // current level + byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1); + if (tmpNextIndexedKey != null) { + nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey); + } + } finally { + if (!dataBlock) { + // Return the block immediately if it is not the + // data block + cachingBlockReader.returnBlock(block); + } } } if (lookupLevel != searchTreeLevel) { + assert dataBlock == true; + cachingBlockReader.returnBlock(block); throw new IOException("Reached a data block at level " + lookupLevel + " but the number of levels is " + searchTreeLevel); } @@ -349,6 +358,7 @@ public class HFileBlockIndex { int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset + SECONDARY_INDEX_ENTRY_OVERHEAD; targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen); + cachingBlockReader.returnBlock(midLeafBlock); } else { // The middle of the root-level index. targetMidKey = blockKeys[rootCount / 2]; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 4d1881d..980170a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -39,7 +39,7 @@ import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.NoTagsKeyValue; -import org.apache.hadoop.hbase.KeyValue.KVComparator; +import org.apache.hadoop.hbase.SharedMemoryBackedCell; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; import org.apache.hadoop.hbase.io.compress.Compression; @@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; import org.apache.hadoop.io.WritableUtils; import org.apache.htrace.Trace; +import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType; import org.apache.htrace.TraceScope; import com.google.common.annotations.VisibleForTesting; @@ -256,6 +257,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } HFileBlock block = readBlock(offset, onDiskSize, true, false, false, false, null, null); + // No need to update cur block with fetcher. Ideally here the + // readBlock wont find the + // block in cache. We call this readBlock so that block data is + // read from FS and + // cached in BC. Even if we have to returnBlock to make sure it is + // being returned, + // just return it immediately. + returnBlock(block); prevBlock = block; offset += block.getOnDiskSizeWithHeader(); } @@ -335,6 +344,15 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { return fileSize; } + @Override + public void returnBlock(HFileBlock block) { + BlockCache blockCache = this.cacheConf.getBlockCache(); + if (blockCache != null && block != null && block.getCacheType() != CacheType.NOT_CACHED) { + BlockCacheKey cacheKey = new BlockCacheKey(this.getFileContext().getHFileName(), + block.getOffset()); + blockCache.returnBlock(cacheKey, block); + } + } /** * @return the first key in the file. May be null if file has no entries. Note * that this is not the first row key, but rather the byte form of the @@ -446,8 +464,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { protected final HFile.Reader reader; private int currTagsLen; - protected HFileBlock block; - /** * The next indexed key is to keep track of the indexed key of the next data block. * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the @@ -456,6 +472,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet. */ protected Cell nextIndexedKey; + // Current block being used + protected HFileBlock curBlock; + // Previous blocks that were used in the course of the read + protected ArrayList prevBlocks = null; public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks, final boolean pread, final boolean isCompaction) { @@ -463,8 +483,44 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { this.cacheBlocks = cacheBlocks; this.pread = pread; this.isCompaction = isCompaction; + prevBlocks = new ArrayList(); + } + + void addToCurrentBlock(HFileBlock block) { + if (block != null && this.curBlock != null && + block.getOffset() == this.curBlock.getOffset()) { + return; + } + if (this.curBlock != null) { + prevBlocks.add(this.curBlock); + } + this.curBlock = block; + } + + void reset() { + if (this.curBlock != null) { + this.prevBlocks.add(this.curBlock); + } + this.curBlock = null; + } + + private void returnBlockToCache(HFileBlock block) { + if (LOG.isTraceEnabled()) { + LOG.trace("Returning the block : " + block); + } + this.reader.returnBlock(block); } + void returnBlocks(boolean returnAll) { + for (int i = 0; i < this.prevBlocks.size(); i++) { + returnBlockToCache(this.prevBlocks.get(i)); + } + this.prevBlocks.clear(); + if (returnAll && this.curBlock != null) { + returnBlockToCache(this.curBlock); + this.curBlock = null; + } + } @Override public boolean isSeeked(){ return blockBuffer != null; @@ -493,6 +549,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { return kvBufSize; } + @Override + public void close() { + this.returnBlocks(true); + } + protected int getNextCellStartPosition() { int nextKvPos = blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen + currMemstoreTSLen; @@ -528,9 +589,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { private final void checkTagsLen() { if (checkLen(this.currTagsLen)) { - throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen + - ". Block offset: " + block.getOffset() + ", block length: " + this.blockBuffer.limit() + - ", position: " + this.blockBuffer.position() + " (without header)."); + throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen + + ". Block offset: " + curBlock.getOffset() + ", block length: " + + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position() + + " (without header)."); } } @@ -607,7 +669,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { || vlen > blockBuffer.limit()) { throw new IllegalStateException("Invalid klen " + klen + " or vlen " + vlen + ". Block offset: " - + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position() + " (without header)."); } ByteBufferUtils.skip(blockBuffer, klen + vlen); @@ -616,7 +678,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { tlen = ((blockBuffer.get() & 0xff) << 8) ^ (blockBuffer.get() & 0xff); if (tlen < 0 || tlen > blockBuffer.limit()) { throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: " - + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: " + blockBuffer.position() + " (without header)."); } ByteBufferUtils.skip(blockBuffer, tlen); @@ -643,8 +705,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { throw new IllegalStateException("blockSeek with seekBefore " + "at the first key of the block: key=" + CellUtil.getCellKeyAsString(key) - + ", blockOffset=" + block.getOffset() + ", onDiskSize=" - + block.getOnDiskSizeWithHeader()); + + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize=" + + curBlock.getOnDiskSizeWithHeader()); } blockBuffer.position(blockBuffer.position() - lastKeyValueSize); readKeyValueLen(); @@ -708,15 +770,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // The comparison with no_next_index_key has to be checked if (this.nextIndexedKey != null && (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader - .getComparator().compareKeyIgnoresMvcc(key, nextIndexedKey) < 0)) { + .getComparator().compare(key, nextIndexedKey) < 0)) { // The reader shall continue to scan the current data block instead // of querying the // block index as long as it knows the target key is strictly // smaller than // the next indexed key or the current data block is the last data // block. - return loadBlockAndSeekToKey(this.block, nextIndexedKey, false, key, false); + return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key, + false); } + } } // Don't rewind on a reseek operation, because reseek implies that we are @@ -740,10 +804,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ public int seekTo(Cell key, boolean rewind) throws IOException { HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); - BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, block, - cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding()); + BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, + curBlock, cacheBlocks, pread, isCompaction, + getEffectiveDataBlockEncoding()); if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { - // This happens if the key e.g. falls before the beginning of the file. + // This happens if the key e.g. falls before the beginning of the + // file. return -1; } return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), @@ -752,17 +818,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { @Override public boolean seekBefore(Cell key) throws IOException { - HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, block, - cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction)); + HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, + curBlock, cacheBlocks, pread, isCompaction, + reader.getEffectiveEncodingInCache(isCompaction)); if (seekToBlock == null) { return false; } ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); - if (reader.getComparator() - .compareKeyIgnoresMvcc( - new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(), - firstKey.limit()), key) >= 0) { + if (reader.getComparator().compareKeyIgnoresMvcc( + new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(), firstKey.limit()), + key) >= 0) { long previousBlockOffset = seekToBlock.getPrevBlockOffset(); // The key we are interested in if (previousBlockOffset == -1) { @@ -773,6 +839,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // It is important that we compute and pass onDiskSize to the block // reader so that it does not have to read the header separately to // figure out the size. + reader.returnBlock(seekToBlock); seekToBlock = reader.readBlock(previousBlockOffset, seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); @@ -793,17 +860,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ protected HFileBlock readNextDataBlock() throws IOException { long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); - if (block == null) + if (curBlock == null) return null; - HFileBlock curBlock = block; + HFileBlock curBlock = this.curBlock; do { if (curBlock.getOffset() >= lastDataBlockOffset) return null; if (curBlock.getOffset() < 0) { - throw new IOException("Invalid block file offset: " + block); + throw new IOException("Invalid block file offset: " + curBlock); } // We are reading the next block without block type validation, because @@ -812,6 +879,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { + curBlock.getOnDiskSizeWithHeader(), curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread, isCompaction, true, null, getEffectiveDataBlockEncoding()); + if (curBlock != null && !curBlock.getBlockType().isData()) { + // Whatever block we read we will be returning it unless + // it is a datablock. Just in case the blocks are non data blocks + reader.returnBlock(curBlock); + } } while (!curBlock.getBlockType().isData()); return curBlock; @@ -823,23 +895,54 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { @Override public Cell getKeyValue() { - if (!isSeeked()) - return null; + if (!isSeeked()) return null; - if(currTagsLen > 0) { - KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset() - + blockBuffer.position(), getCellBufSize()); - if (this.reader.shouldIncludeMemstoreTS()) { - ret.setSequenceId(currMemstoreTS); + KeyValue ret = null; + if (currTagsLen > 0) { + if (this.curBlock.getMemoryType() == BlockCache.MemoryType.SHARED_MEM) { + ret = new SharedMemoryKeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position(), getCellBufSize()); + } else { + ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position(), getCellBufSize()); } - return ret; } else { - NoTagsKeyValue ret = new NoTagsKeyValue(blockBuffer.array(), - blockBuffer.arrayOffset() + blockBuffer.position(), getCellBufSize()); - if (this.reader.shouldIncludeMemstoreTS()) { - ret.setSequenceId(currMemstoreTS); + if (this.curBlock.getMemoryType() == BlockCache.MemoryType.SHARED_MEM) { + ret = new SharedMemoryNoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position(), getCellBufSize()); + } else { + ret = new NoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + + blockBuffer.position(), getCellBufSize()); } - return ret; + } + if (this.reader.shouldIncludeMemstoreTS()) { + ret.setSequenceId(currMemstoreTS); + } + return ret; + } + + private static class SharedMemoryKeyValue extends KeyValue implements SharedMemoryBackedCell { + public SharedMemoryKeyValue(byte[] bytes, int offset, int length) { + super(bytes, offset, length); + } + + @Override + public Cell copy() { + byte[] copy = Bytes.copy(this.bytes, this.offset, this.length); + return new KeyValue(copy, 0, copy.length); + } + } + + private static class SharedMemoryNoTagsKeyValue extends NoTagsKeyValue implements + SharedMemoryBackedCell { + public SharedMemoryNoTagsKeyValue(byte[] bytes, int offset, int length) { + super(bytes, offset, length); + } + + @Override + public Cell copy() { + byte[] copy = Bytes.copy(this.bytes, this.offset, this.length); + return new KeyValue(copy, 0, copy.length); } } @@ -872,7 +975,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } protected void setNonSeekedState() { - block = null; + reset(); blockBuffer = null; currKeyLen = 0; currValueLen = 0; @@ -892,7 +995,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { + "; currKeyLen = " + currKeyLen + "; currValLen = " + currValueLen + "; block limit = " + blockBuffer.limit() + "; HFile name = " + reader.getName() - + "; currBlock currBlockOffset = " + block.getOffset()); + + "; currBlock currBlockOffset = " + this.curBlock.getOffset()); throw e; } } @@ -905,7 +1008,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { private boolean positionForNextBlock() throws IOException { // Methods are small so they get inlined because they are 'hot'. long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset(); - if (block.getOffset() >= lastDataBlockOffset) { + if (this.curBlock.getOffset() >= lastDataBlockOffset) { setNonSeekedState(); return false; } @@ -920,7 +1023,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { setNonSeekedState(); return false; } - updateCurrBlock(nextBlock); + updateCurrentBlock(nextBlock); return true; } @@ -969,27 +1072,38 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { return false; } - long firstDataBlockOffset = - reader.getTrailer().getFirstDataBlockOffset(); - if (block != null && block.getOffset() == firstDataBlockOffset) { - blockBuffer.rewind(); - readKeyValueLen(); - return true; + long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset(); + if (curBlock != null + && curBlock.getOffset() == firstDataBlockOffset) { + return processFirstDataBlock(); } - block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, + readAndUpdateNewBlock(firstDataBlockOffset); + return true; + + } + + protected boolean processFirstDataBlock() throws IOException{ + blockBuffer.rewind(); + readKeyValueLen(); + return true; + } + + protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException, + CorruptHFileException { + HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); - if (block.getOffset() < 0) { - throw new IOException("Invalid block offset: " + block.getOffset()); + if (newBlock.getOffset() < 0) { + throw new IOException("Invalid block offset: " + newBlock.getOffset()); } - updateCurrBlock(block); - return true; + updateCurrentBlock(newBlock); } - + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, Cell key, boolean seekBefore) throws IOException { - if (block == null || block.getOffset() != seekToBlock.getOffset()) { - updateCurrBlock(seekToBlock); + if (this.curBlock == null + || this.curBlock.getOffset() != seekToBlock.getOffset()) { + updateCurrentBlock(seekToBlock); } else if (rewind) { blockBuffer.rewind(); } @@ -1012,10 +1126,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ protected final void checkKeyValueLen() { if (checkLen(this.currKeyLen) || checkLen(this.currValueLen)) { - throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen + - " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() + - ", block length: " + this.blockBuffer.limit() + ", position: " + - this.blockBuffer.position() + " (without header)."); + throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen + + " or currValueLen " + this.currValueLen + ". Block offset: " + + this.curBlock.getOffset() + ", block length: " + + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position() + + " (without header)."); } } @@ -1025,19 +1140,18 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * * @param newBlock the block to make current */ - protected void updateCurrBlock(HFileBlock newBlock) { - block = newBlock; - + protected void updateCurrentBlock(HFileBlock newBlock) throws IOException { + // Set the active block on the reader // sanity check - if (block.getBlockType() != BlockType.DATA) { - throw new IllegalStateException("Scanner works only on data " + - "blocks, got " + block.getBlockType() + "; " + - "fileName=" + reader.getName() + ", " + - "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " + - "isCompaction=" + isCompaction); + if (newBlock.getBlockType() != BlockType.DATA) { + throw new IllegalStateException("ScannerV2 works only on data " + "blocks, got " + + curBlock.getBlockType() + "; " + "fileName=" + reader.getName() + + ", " + "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " + "isCompaction=" + + isCompaction); } - blockBuffer = block.getBufferWithoutHeader(); + addToCurrentBlock(newBlock); + blockBuffer = newBlock.getBufferWithoutHeader(); readKeyValueLen(); blockFetches++; @@ -1077,6 +1191,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { new KeyValue.KeyOnlyKeyValue(blockBuffer.array(), blockBuffer.arrayOffset() + blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen)); } + + @Override + public void returnCurrentBatch() { + this.returnBlocks(false); + } + } public Path getPath() { @@ -1130,7 +1250,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock, boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, - DataBlockEncoding expectedDataBlockEncoding) throws IOException { + DataBlockEncoding expectedDataBlockEncoding) throws IOException { // Check cache for block. If found return. if (cacheConf.isBlockCacheEnabled()) { BlockCache cache = cacheConf.getBlockCache(); @@ -1138,7 +1258,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { updateCacheMetrics); if (cachedBlock != null) { if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) { - cachedBlock = cachedBlock.unpack(hfileContext, fsBlockReader); + HFileBlock compressedBlock = cachedBlock; + cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader); + // In case of compressed block after unpacking we can return the compressed block + if (compressedBlock != cachedBlock) cache.returnBlock(cacheKey, compressedBlock); } validateBlockType(cachedBlock, expectedBlockType); @@ -1174,6 +1297,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { " because of a data block encoding mismatch" + "; expected: " + expectedDataBlockEncoding + ", actual: " + actualDataBlockEncoding); + // This is an error scenario. so here we need to decrement the + // count. + cache.returnBlock(cacheKey, cachedBlock); cache.evictBlock(cacheKey); } return null; @@ -1191,7 +1317,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * @throws IOException */ @Override - public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) + public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException { if (trailer.getMetaIndexCount() == 0) { return null; // there are no meta blocks @@ -1223,7 +1349,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { assert cachedBlock.isUnpacked() : "Packed block leak."; // Return a distinct 'shallow copy' of the block, // so pos does not get messed by the scanner - return cachedBlock.getBufferWithoutHeader(); + return cachedBlock; } // Cache Miss, please load. } @@ -1237,7 +1363,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1()); } - return metaBlock.getBufferWithoutHeader(); + return metaBlock; } } @@ -1434,7 +1560,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { @Override public boolean isSeeked(){ - return this.block != null; + return curBlock != null; + } + + public void setNonSeekedState() { + reset(); } /** @@ -1444,22 +1574,20 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * @param newBlock the block to make current * @throws CorruptHFileException */ - private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { - block = newBlock; - + @Override + protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException { // sanity checks - if (block.getBlockType() != BlockType.ENCODED_DATA) { - throw new IllegalStateException( - "EncodedScanner works only on encoded data blocks"); + if (newBlock.getBlockType() != BlockType.ENCODED_DATA) { + throw new IllegalStateException("EncodedScanner works only on encoded data blocks"); } - short dataBlockEncoderId = block.getDataBlockEncodingId(); + short dataBlockEncoderId = newBlock.getDataBlockEncodingId(); if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) { String encoderCls = dataBlockEncoder.getClass().getName(); throw new CorruptHFileException("Encoder " + encoderCls + " doesn't support data block encoding " + DataBlockEncoding.getNameFromId(dataBlockEncoderId)); } - + addToCurrentBlock(newBlock); seeker.setCurrentBuffer(getEncodedBuffer(newBlock)); blockFetches++; @@ -1476,31 +1604,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { DataBlockEncoding.ID_SIZE).slice(); return encodedBlock; } - + @Override - public boolean seekTo() throws IOException { - if (reader == null) { - return false; - } - - if (reader.getTrailer().getEntryCount() == 0) { - // No data blocks. - return false; - } - - long firstDataBlockOffset = - reader.getTrailer().getFirstDataBlockOffset(); - if (block != null && block.getOffset() == firstDataBlockOffset) { - seeker.rewind(); - return true; - } - - block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread, - isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding()); - if (block.getOffset() < 0) { - throw new IOException("Invalid block offset: " + block.getOffset()); - } - updateCurrentBlock(block); + protected boolean processFirstDataBlock() throws IOException { + seeker.rewind(); return true; } @@ -1508,10 +1615,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { public boolean next() throws IOException { boolean isValid = seeker.next(); if (!isValid) { - block = readNextDataBlock(); - isValid = block != null; + HFileBlock newBlock = readNextDataBlock(); + isValid = newBlock != null; if (isValid) { - updateCurrentBlock(block); + updateCurrentBlock(newBlock); + } else { + setNonSeekedState(); } } return isValid; @@ -1531,7 +1640,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { @Override public Cell getKeyValue() { - if (block == null) { + if (this.curBlock == null) { return null; } return seeker.getKeyValue(); @@ -1552,7 +1661,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } private void assertValidSeek() { - if (block == null) { + if (this.curBlock == null) { throw new NotSeekedException(); } } @@ -1561,9 +1670,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { return dataBlockEncoder.getFirstKeyInBlock(getEncodedBuffer(curBlock)); } + @Override protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey, boolean rewind, Cell key, boolean seekBefore) throws IOException { - if (block == null || block.getOffset() != seekToBlock.getOffset()) { + if (this.curBlock == null + || this.curBlock.getOffset() != seekToBlock.getOffset()) { updateCurrentBlock(seekToBlock); } else if (rewind) { seeker.rewind(); @@ -1644,6 +1755,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { HFileContextBuilder builder = new HFileContextBuilder() .withIncludesMvcc(shouldIncludeMemstoreTS()) .withHBaseCheckSum(true) + .withPathName(this.getName()) .withCompression(this.compressAlgo); // Check for any key material available diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java index 2b6e011..e84738b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java @@ -22,6 +22,7 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.regionserver.BatchReturnableScanner; import org.apache.hadoop.hbase.Cell; /** @@ -37,7 +38,7 @@ import org.apache.hadoop.hbase.Cell; * getValue. */ @InterfaceAudience.Private -public interface HFileScanner { +public interface HFileScanner extends BatchReturnableScanner { /** * SeekTo or just before the passed cell. Examine the return * code to figure whether we found the cell or not. @@ -149,4 +150,9 @@ public interface HFileScanner { * @return the next key in the index (the key to seek to the next block) */ Cell getNextIndexedKey(); + + /** + * Does the required close operation on the current scanner + */ + void close(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java index 18dcbb0..961d7ae 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java @@ -34,13 +34,13 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantLock; -import com.google.common.base.Objects; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; @@ -49,6 +49,7 @@ import org.apache.hadoop.util.StringUtils; import org.codehaus.jackson.annotate.JsonIgnoreProperties; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Objects; import com.google.common.util.concurrent.ThreadFactoryBuilder; /** @@ -1090,4 +1091,11 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize { public BlockCache[] getBlockCaches() { return null; } + + @Override + public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) { + assert block.getCacheType() == CacheType.L1_CACHED; + // Do nothing + return true; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java index 57e7f28..e60711a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java @@ -269,4 +269,10 @@ public class MemcachedBlockCache implements BlockCache { } } + @Override + public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) { + // Not handling reference counting + return true; + } + } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index dfada87..9700704 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -43,6 +43,7 @@ import java.util.concurrent.ConcurrentMap; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; @@ -62,10 +63,12 @@ import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer; import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager; import org.apache.hadoop.hbase.io.hfile.CachedBlock; import org.apache.hadoop.hbase.io.hfile.HFileBlock; +import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType; import org.apache.hadoop.hbase.util.ConcurrentIndex; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.HasThread; import org.apache.hadoop.hbase.util.IdLock; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.util.StringUtils; import com.google.common.annotations.VisibleForTesting; @@ -417,8 +420,9 @@ public class BucketCache implements BlockCache, HeapSize { // existence here. if (bucketEntry.equals(backingMap.get(key))) { int len = bucketEntry.getLength(); - ByteBuffer bb = ByteBuffer.allocate(len); - int lenRead = ioEngine.read(bb, bucketEntry.offset()); + Pair pair = ioEngine.read(bucketEntry.offset(), len); + ByteBuffer bb = pair.getFirst(); + int lenRead = bb.limit(); if (lenRead != len) { throw new RuntimeException("Only " + lenRead + " bytes read, " + len + " expected"); } @@ -430,6 +434,15 @@ public class BucketCache implements BlockCache, HeapSize { cacheStats.hit(caching); cacheStats.ioHit(timeTaken); } + if (cachedBlock instanceof HFileBlock) { + ((HFileBlock) cachedBlock).setCacheType(CacheType.L2_CACHED); + ((HFileBlock) cachedBlock).setMemoryType(pair.getSecond()); + // Once we have HBAS-11425 we can increment/decrement the ref count + // only on the SHARED_MEM type + if (pair.getSecond() == MemoryType.SHARED_MEM) { + bucketEntry.refCount.incrementAndGet(); + } + } bucketEntry.access(accessCount.incrementAndGet()); if (this.ioErrorStartTime > 0) { ioErrorStartTime = -1; @@ -463,6 +476,9 @@ public class BucketCache implements BlockCache, HeapSize { @Override public boolean evictBlock(BlockCacheKey cacheKey) { + return evictBlock(cacheKey, false); + } + public boolean evictBlock(BlockCacheKey cacheKey, boolean checkInUse) { if (!cacheEnabled) { return false; } @@ -483,10 +499,31 @@ public class BucketCache implements BlockCache, HeapSize { IdLock.Entry lockEntry = null; try { lockEntry = offsetLock.getLockEntry(bucketEntry.offset()); - if (backingMap.remove(cacheKey, bucketEntry)) { - blockEvicted(cacheKey, bucketEntry, removedBlock == null); + int refCount = bucketEntry.refCount.get(); + if(refCount == 0) { + if (backingMap.remove(cacheKey, bucketEntry)) { + blockEvicted(cacheKey, bucketEntry, removedBlock == null); + } else { + return false; + } } else { - return false; + if(checkInUse) { + if (LOG.isDebugEnabled()) { + LOG.debug("This block " + cacheKey + " is still referred by " + refCount + + " readers. Can not be freed now"); + } + return false; + } else { + if (LOG.isDebugEnabled()) { + LOG.debug("This block " + + cacheKey + + " is still referred by " + + refCount + + " readers. Can not be freed now. Hence will mark this" + + " for evicting at a later point"); + } + bucketEntry.markedForEvict = true; + } } } catch (IOException ie) { LOG.warn("Failed evicting block " + cacheKey); @@ -1102,6 +1139,10 @@ public class BucketCache implements BlockCache, HeapSize { byte deserialiserIndex; private volatile long accessCounter; private BlockPriority priority; + // Set this when we were not able to forcefully evict the block + private volatile boolean markedForEvict; + private AtomicInteger refCount = new AtomicInteger(0); + /** * Time this block was cached. Presumes we are created just before we are added to the cache. */ @@ -1193,9 +1234,12 @@ public class BucketCache implements BlockCache, HeapSize { public long free(long toFree) { Map.Entry entry; long freedBytes = 0; + // TODO avoid a cycling siutation. We find no block which is not in use and so no way to free + // What to do then? Caching attempt fail? Need some changes in cacheBlock API? while ((entry = queue.pollLast()) != null) { - evictBlock(entry.getKey()); - freedBytes += entry.getValue().getLength(); + if (evictBlock(entry.getKey(), true)) { + freedBytes += entry.getValue().getLength(); + } if (freedBytes >= toFree) { return freedBytes; } @@ -1399,4 +1443,31 @@ public class BucketCache implements BlockCache, HeapSize { public BlockCache[] getBlockCaches() { return null; } + + @Override + public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) { + assert block.getCacheType() == CacheType.L2_CACHED; + // When the block was served from RAMQueueEntry, it wont set the CacheType as L2. It is still on + // its way to get into L2 cache. So still it is like NOT_FROM_CACHE. + BucketEntry bucketEntry = backingMap.get(cacheKey); + if (bucketEntry != null) { + if(block.getMemoryType() == MemoryType.SHARED_MEM) { + bucketEntry.refCount.decrementAndGet(); + } + if(bucketEntry.markedForEvict) { + evictBlock(cacheKey); + } + return true; + } + return false; + } + + @VisibleForTesting + public int getRefCount(BlockCacheKey cacheKey) { + BucketEntry bucketEntry = backingMap.get(cacheKey); + if (bucketEntry != null) { + return bucketEntry.refCount.get(); + } + return 0; + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java index de10667..4fe382f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java @@ -22,7 +22,9 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; import org.apache.hadoop.hbase.util.ByteBufferArray; +import org.apache.hadoop.hbase.util.Pair; /** * IO engine that stores data in memory using an array of ByteBuffers @@ -65,17 +67,19 @@ public class ByteBufferIOEngine implements IOEngine { /** * Transfers data from the buffer array to the given byte buffer - * @param dstBuffer the given byte buffer into which bytes are to be written * @param offset The offset in the ByteBufferArray of the first byte to be * read + * @param length The length of the ByteBuffer that should be allocated for + * reading from the underlying ByteBufferArray. * @return number of bytes read * @throws IOException */ @Override - public int read(ByteBuffer dstBuffer, long offset) throws IOException { - assert dstBuffer.hasArray(); - return bufferArray.getMultiple(offset, dstBuffer.remaining(), dstBuffer.array(), + public Pair read(long offset, int length) throws IOException { + ByteBuffer dstBuffer = ByteBuffer.allocate(length); + bufferArray.getMultiple(offset, dstBuffer.remaining(), dstBuffer.array(), dstBuffer.arrayOffset()); + return new Pair(dstBuffer, MemoryType.SHARED_MEM); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java index 7b6b25f..394aec6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java @@ -26,6 +26,8 @@ import java.nio.channels.FileChannel; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; +import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.util.StringUtils; /** @@ -79,14 +81,17 @@ public class FileIOEngine implements IOEngine { /** * Transfers data from file to the given byte buffer - * @param dstBuffer the given byte buffer into which bytes are to be written * @param offset The offset in the file where the first byte to be read + * @param length The length of buffer that should be allocated for reading + * from the file channel * @return number of bytes read * @throws IOException */ @Override - public int read(ByteBuffer dstBuffer, long offset) throws IOException { - return fileChannel.read(dstBuffer, offset); + public Pair read(long offset, int length) throws IOException { + ByteBuffer dstBuffer = ByteBuffer.allocate(length); + fileChannel.read(dstBuffer, offset); + return new Pair(dstBuffer, MemoryType.NON_SHARED_MEM); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java index 430c5af..61f479c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java @@ -22,6 +22,8 @@ import java.io.IOException; import java.nio.ByteBuffer; import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; +import org.apache.hadoop.hbase.util.Pair; /** * A class implementing IOEngine interface supports data services for @@ -35,13 +37,13 @@ public interface IOEngine { boolean isPersistent(); /** - * Transfers data from IOEngine to the given byte buffer - * @param dstBuffer the given byte buffer into which bytes are to be written + * Transfers data from IOEngine to a byte buffer * @param offset The offset in the IO engine where the first byte to be read - * @return number of bytes read + * @param length How many bytes to be read from the offset + * @return Pair of ByteBuffer where data is read and its MemoryType * @throws IOException */ - int read(ByteBuffer dstBuffer, long offset) throws IOException; + Pair read(long offset, int length) throws IOException; /** * Transfers data from the given byte buffer to IOEngine diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java index 26ffa95..89603a6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java @@ -16,16 +16,15 @@ package org.apache.hadoop.hbase.ipc; * See the License for the specific language governing permissions and * limitations under the License. */ +import java.nio.ByteBuffer; import java.nio.channels.ClosedChannelException; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.ipc.RpcServer.Call; import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler; import org.apache.hadoop.hbase.monitoring.TaskMonitor; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.StringUtils; import org.apache.htrace.Trace; @@ -40,8 +39,6 @@ import com.google.protobuf.Message; */ @InterfaceAudience.Private public class CallRunner { - private static final Log LOG = LogFactory.getLog(CallRunner.class); - private Call call; private RpcServerInterface rpcServer; private MonitoredRPCHandler status; @@ -91,7 +88,7 @@ public class CallRunner { } Throwable errorThrowable = null; String error = null; - Pair resultPair = null; + Triple result = null; RpcServer.CurCall.set(call); TraceScope traceScope = null; try { @@ -103,8 +100,8 @@ public class CallRunner { traceScope = Trace.startSpan(call.toTraceString(), call.tinfo); } // make the call - resultPair = this.rpcServer.call(call.service, call.md, call.param, call.cellScanner, - call.timestamp, this.status); + result = this.rpcServer.call(call.service, call.md, call.param, call.cellScanner, + call.timestamp, this.status, call.connection.codec, call.connection.compressionCodec); } catch (Throwable e) { RpcServer.LOG.debug(Thread.currentThread().getName() + ": " + call.toShortString(), e); errorThrowable = e; @@ -117,7 +114,7 @@ public class CallRunner { traceScope.close(); } RpcServer.CurCall.set(null); - if (resultPair != null) { + if (result != null) { this.rpcServer.addCallSize(call.getSize() * -1); sucessful = true; } @@ -125,9 +122,10 @@ public class CallRunner { // Set the response for undelayed calls and delayed calls with // undelayed responses. if (!call.isDelayed() || !call.isReturnValueDelayed()) { - Message param = resultPair != null ? resultPair.getFirst() : null; - CellScanner cells = resultPair != null ? resultPair.getSecond() : null; - call.setResponse(param, cells, errorThrowable, error); + Message param = result != null ? result.getFirst() : null; + CellScanner cells = result != null ? result.getSecond() : null; + ByteBuffer cellBlocks = result != null ? result.getThird() : null; + call.setResponse(param, cells, errorThrowable, error, cellBlocks); } call.sendResponseIfReady(); this.status.markComplete("Sent response"); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java index 3557768..eb82e96 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java @@ -106,7 +106,7 @@ import org.apache.hadoop.hbase.security.UserProvider; import org.apache.hadoop.hbase.security.token.AuthenticationTokenSecretManager; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Counter; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Writable; @@ -184,7 +184,6 @@ public class RpcServer implements RpcServerInterface { private final int warnDelayedCalls; private AtomicInteger delayedCalls; - private final IPCUtil ipcUtil; private static final String AUTH_FAILED_FOR = "Auth failed for "; private static final String AUTH_SUCCESSFUL_FOR = "Auth successful for "; @@ -274,7 +273,7 @@ public class RpcServer implements RpcServerInterface { private UserProvider userProvider; private final BoundedByteBufferPool reservoir; - + private final IPCUtil ipcUtil; /** * Datastructure that holds all necessary to a method invocation and then afterward, carries @@ -377,7 +376,7 @@ public class RpcServer implements RpcServerInterface { } protected synchronized void setResponse(Object m, final CellScanner cells, - Throwable t, String errorMsg) { + Throwable t, String errorMsg, ByteBuffer cellBlocks) { if (this.isError) return; if (t != null) this.isError = true; BufferChain bc = null; @@ -403,12 +402,14 @@ public class RpcServer implements RpcServerInterface { // Set the exception as the result of the method invocation. headerBuilder.setException(exceptionBuilder.build()); } - // Pass reservoir to buildCellBlock. Keep reference to returne so can add it back to the + // Pass reservoir to buildCellBlock. Keep reference to return so can add it back to the // reservoir when finished. This is hacky and the hack is not contained but benefits are // high when we can avoid a big buffer allocation on each rpc. - this.cellBlock = ipcUtil.buildCellBlock(this.connection.codec, - this.connection.compressionCodec, cells, reservoir); + // The cellblock would have been already formed for this connection + this.cellBlock = (cellBlocks != null) ? cellBlocks : ipcUtil.buildCellBlock( + this.connection.codec, this.connection.compressionCodec, cells, reservoir); if (this.cellBlock != null) { + // Update the cellblock so that it is put back to the BoundedByteBufferPool CellBlockMeta.Builder cellBlockBuilder = CellBlockMeta.newBuilder(); // Presumes the cellBlock bytebuffer has been flipped so limit has total size in it. cellBlockBuilder.setLength(this.cellBlock.limit()); @@ -462,7 +463,7 @@ public class RpcServer implements RpcServerInterface { this.delayResponse = false; delayedCalls.decrementAndGet(); if (this.delayReturnValue) { - this.setResponse(result, null, null, null); + this.setResponse(result, null, null, null, null); } this.responder.doRespond(this); } @@ -485,7 +486,7 @@ public class RpcServer implements RpcServerInterface { @Override public synchronized void endDelayThrowing(Throwable t) throws IOException { - this.setResponse(null, null, t, StringUtils.stringifyException(t)); + this.setResponse(null, null, t, StringUtils.stringifyException(t), null); this.delayResponse = false; this.sendResponseIfReady(); } @@ -1208,11 +1209,11 @@ public class RpcServer implements RpcServerInterface { /** * Codec the client asked use. */ - private Codec codec; + protected Codec codec; /** * Compression codec the client asked us use. */ - private CompressionCodec compressionCodec; + protected CompressionCodec compressionCodec; BlockingService service; protected UserGroupInformation user = null; private AuthMethod authMethod; @@ -1449,7 +1450,7 @@ public class RpcServer implements RpcServerInterface { WritableUtils.writeString(out, errorClass); WritableUtils.writeString(out, error); } - saslCall.setSaslTokenResponse(saslResponse.getByteBuffer()); + saslCall.setSaslTokenResponse(saslResponse.flipAndReturnBuffer()); saslCall.responder = responder; saslCall.sendResponseIfReady(); } finally { @@ -1816,8 +1817,8 @@ public class RpcServer implements RpcServerInterface { offset += paramSize; } if (header.hasCellBlockMeta()) { - cellScanner = ipcUtil.createCellScanner(this.codec, this.compressionCodec, - buf, offset, buf.length); + cellScanner = ipcUtil.createCellScanner(this.codec, this.compressionCodec, buf, offset, + buf.length); } } catch (Throwable t) { String msg = getListenerAddress() + " is unable to read call parameter from client " + @@ -1989,7 +1990,6 @@ public class RpcServer implements RpcServerInterface { this.warnDelayedCalls = conf.getInt(WARN_DELAYED_CALLS, DEFAULT_WARN_DELAYED_CALLS); this.delayedCalls = new AtomicInteger(0); - this.ipcUtil = new IPCUtil(conf); // Create the responder here @@ -2002,6 +2002,7 @@ public class RpcServer implements RpcServerInterface { } this.scheduler = scheduler; this.scheduler.init(new RpcSchedulerContext(this)); + ipcUtil = IPCUtil.createInstance(this.conf); } /** @@ -2023,7 +2024,7 @@ public class RpcServer implements RpcServerInterface { private void setupResponse(ByteArrayOutputStream response, Call call, Throwable t, String error) throws IOException { if (response != null) response.reset(); - call.setResponse(null, null, t, error); + call.setResponse(null, null, t, error, null); } protected void closeConnection(Connection connection) { @@ -2100,10 +2101,23 @@ public class RpcServer implements RpcServerInterface { * the return response has protobuf response payload. On failure, the * exception name and the stack trace are returned in the protobuf response. */ + // Used by tests only + protected Triple call(BlockingService service, + MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime, + MonitoredRPCHandler status) throws IOException { + return call(service, md, param, cellScanner, receiveTime, status, null, null); + } + + /** + * This is a server side method, which is invoked over RPC. On success + * the return response has protobuf response payload. On failure, the + * exception name and the stack trace are returned in the protobuf response. + */ @Override - public Pair call(BlockingService service, MethodDescriptor md, - Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status) - throws IOException { + public Triple call(BlockingService service, + MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime, + MonitoredRPCHandler status, Codec codec, CompressionCodec compressionCodec) + throws IOException { try { status.setRPC(md.getName(), new Object[]{param}, receiveTime); // TODO: Review after we add in encoded data blocks. @@ -2112,6 +2126,11 @@ public class RpcServer implements RpcServerInterface { //get an instance of the method arg type long startTime = System.currentTimeMillis(); PayloadCarryingRpcController controller = new PayloadCarryingRpcController(cellScanner); + // see if we can go with constructors or setters + controller.setReservoir(reservoir); + controller.setCodec(codec); + controller.setCompressionCodec(compressionCodec); + Message result = service.callBlockingMethod(md, controller, param); long endTime = System.currentTimeMillis(); int processingTime = (int) (endTime - startTime); @@ -2141,7 +2160,8 @@ public class RpcServer implements RpcServerInterface { status.getClient(), startTime, processingTime, qTime, responseSize); } - return new Pair(result, controller.cellScanner()); + return new Triple(result, controller.cellScanner(), + controller.getCellBlock()); } catch (Throwable e) { // The above callBlockingMethod will always return a SE. Strip the SE wrapper before // putting it on the wire. Its needed to adhere to the pb Service Interface but we don't diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java index ab8b485..b6ae6aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java @@ -21,13 +21,16 @@ package org.apache.hadoop.hbase.ipc; import java.io.IOException; import java.net.InetSocketAddress; +import java.nio.ByteBuffer; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; +import org.apache.hadoop.hbase.codec.Codec; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler; -import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; +import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.security.authorize.PolicyProvider; import com.google.common.annotations.VisibleForTesting; @@ -48,9 +51,10 @@ public interface RpcServerInterface { void setSocketSendBufSize(int size); InetSocketAddress getListenerAddress(); - Pair call(BlockingService service, MethodDescriptor md, - Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status) - throws IOException, ServiceException; + Triple call(BlockingService service, MethodDescriptor md, + Message param, + CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status, + Codec codec, CompressionCodec compressionCodec) throws IOException, ServiceException; void setErrorHandler(HBaseRPCErrorHandler handler); HBaseRPCErrorHandler getErrorHandler(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java index 34c749e..55419d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java @@ -124,6 +124,11 @@ public class DefaultOperationQuota implements OperationQuota { } @Override + public void addOperationSize(OperationType type, long size) { + avgOpSize.addOperationSize(type, size); + } + + @Override public long getAvgOperationSize(OperationType type) { return avgOpSize.getAvgOperationSize(type); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java index e67c7c0..82c6568 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java @@ -81,4 +81,9 @@ class NoopOperationQuota implements OperationQuota { public long getAvgOperationSize(OperationType type) { return -1; } + + @Override + public void addOperationSize(OperationType type, long size) { + // no-op + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java index b885ac9..6b4ffb1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java @@ -125,4 +125,10 @@ public interface OperationQuota { /** @return the average data size of the specified operation */ long getAvgOperationSize(OperationType type); + + /** + * Add a result from scan/get. This will be used to calculate the exact quota and + * have a better short-read average size for the next time. + */ + public void addOperationSize(OperationType type, long size); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java new file mode 100644 index 0000000..09d80d1 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +/** + * This interface denotes that the scanner implementing this interface + * allows to return the blocks involved in a scanner, as a batch + */ +@InterfaceAudience.Private +public interface BatchReturnableScanner { + + /** + * Allows the blocks being used in the current batch to be returned + */ + public void returnCurrentBatch(); +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java new file mode 100644 index 0000000..ba74b80 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; +/** + * A context object to be used with Gets. + */ +@InterfaceAudience.Private +public class GetContext { + private PayloadCarryingRpcController rpcController; + + public GetContext(PayloadCarryingRpcController rpcController) { + this.rpcController = rpcController; + } + + public void setCellsCountInBlock(int numberOfCells) { + this.rpcController.setCellsCountInBlock(numberOfCells); + } + + public int getCellsCountInBlock() { + return this.rpcController.getCellsCountInBlock(); + } + + public void setCellBlock(final ArrayList cells, int totalCellsSize) throws IOException { + this.rpcController.setCellBlock(cells, totalCellsSize); + } + +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index ced6ccb..9c0f790 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -34,6 +34,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.ListIterator; import java.util.Map; import java.util.Map.Entry; import java.util.NavigableMap; @@ -88,6 +89,7 @@ import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.NamespaceDescriptor; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.RegionTooBusyException; +import org.apache.hadoop.hbase.SharedMemoryBackedCell; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.TagType; @@ -2385,7 +2387,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi ////////////////////////////////////////////////////////////////////////////// @Override - public Result getClosestRowBefore(final byte [] row, final byte [] family) throws IOException { + public Result getClosestRowBefore(final byte[] row, final byte[] family) throws IOException { if (coprocessorHost != null) { Result result = new Result(); if (coprocessorHost.preGetClosestRowBefore(row, family, result)) { @@ -2418,11 +2420,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public RegionScanner getScanner(Scan scan) throws IOException { - return getScanner(scan, null); + return getScanner(scan, null, false); } - protected RegionScanner getScanner(Scan scan, - List additionalScanners) throws IOException { + public RegionScanner getScanner(Scan scan, boolean isCellBlockSupported) + throws IOException { + return getScanner(scan, null, isCellBlockSupported); + } + + protected RegionScanner getScanner(Scan scan, List additionalScanners, + boolean isCellBlockSupported) throws IOException { startRegionOperation(Operation.SCAN); try { // Verify families are all valid @@ -2436,21 +2443,21 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi checkFamily(family); } } - return instantiateRegionScanner(scan, additionalScanners); + return instantiateRegionScanner(scan, additionalScanners, isCellBlockSupported); } finally { closeRegionOperation(Operation.SCAN); } } protected RegionScanner instantiateRegionScanner(Scan scan, - List additionalScanners) throws IOException { + List additionalScanners, boolean isCellBlockSupported) throws IOException { if (scan.isReversed()) { if (scan.getFilter() != null) { scan.getFilter().setReversed(true); } - return new ReversedRegionScannerImpl(scan, additionalScanners, this); + return new ReversedRegionScannerImpl(scan, additionalScanners, this, isCellBlockSupported); } - return new RegionScannerImpl(scan, additionalScanners, this); + return new RegionScannerImpl(scan, additionalScanners, this, isCellBlockSupported); } @Override @@ -5189,7 +5196,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi /** * RegionScannerImpl is used to combine scanners from multiple Stores (aka column families). */ - class RegionScannerImpl implements RegionScanner { + class RegionScannerImpl implements RegionScanner, BatchReturnableScanner { // Package local for testability KeyValueHeap storeHeap = null; /** Heap of key-values that are not essential for the provided filters and are thus read @@ -5205,6 +5212,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi protected final byte[] stopRow; protected final HRegion region; protected final CellComparator comparator; + protected boolean copyCellsFromSharedMem; private final long readPt; private final long maxResultSize; @@ -5216,8 +5224,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return region.getRegionInfo(); } - RegionScannerImpl(Scan scan, List additionalScanners, HRegion region) - throws IOException { + public void setCopyCellsFromSharedMem(boolean copyCells) { + this.copyCellsFromSharedMem = copyCells; + } + + RegionScannerImpl(Scan scan, List additionalScanners, HRegion region, + boolean isCellBlockSupported) throws IOException { this.region = region; this.maxResultSize = scan.getMaxResultSize(); if (scan.hasFilter()) { @@ -5226,6 +5238,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.filter = null; } this.comparator = region.getCellCompartor(); + this.copyCellsFromSharedMem = (isCellBlockSupported == false); /** * By default, calls to next/nextRaw must enforce the batch limit. Thus, construct a default @@ -5301,6 +5314,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi return this.defaultScannerContext.getBatchLimit(); } + @Override + public void returnCurrentBatch() { + this.storeHeap.returnCurrentBatch(); + if (joinedHeap != null) { + this.joinedHeap.returnCurrentBatch(); + } + } + /** * Reset both the filter and the old filter. * @@ -5348,24 +5369,50 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // scanner is closed throw new UnknownScannerException("Scanner was closed"); } - boolean moreValues; - if (outResults.isEmpty()) { - // Usually outResults is empty. This is true when next is called - // to handle scan or get operation. - moreValues = nextInternal(outResults, scannerContext); - } else { - List tmpList = new ArrayList(); - moreValues = nextInternal(tmpList, scannerContext); - outResults.addAll(tmpList); - } + boolean moreValues = false; + try { + if (outResults.isEmpty()) { + // Usually outResults is empty. This is true when next is called + // to handle scan or get operation. + moreValues = nextInternal(outResults, scannerContext); + } else { + List tmpList = new ArrayList(); + moreValues = nextInternal(tmpList, scannerContext); + outResults.addAll(tmpList); + } - // If the size limit was reached it means a partial Result is being returned. Returning a - // partial Result means that we should not reset the filters; filters should only be reset in - // between rows - if (!scannerContext.partialResultFormed()) resetFilters(); + // If the size limit was reached it means a partial Result is being + // returned. Returning a + // partial Result means that we should not reset the filters; filters + // should only be reset in + // between rows + if (!scannerContext.partialResultFormed()) + resetFilters(); - if (isFilterDoneInternal()) { - moreValues = false; + if (isFilterDoneInternal()) { + moreValues = false; + } + + // If copyCellsFromSharedMem = true, then we need to copy the cells. Otherwise nextRaw() is + // coming from RSRpcServices#scan() and we will make the cellBlock immediately and also do + // finalizeScan() + if (copyCellsFromSharedMem && !outResults.isEmpty()) { + // Do the copy of the results here. + ListIterator listItr = outResults.listIterator(); + Cell cell = null; + while (listItr.hasNext()) { + cell = listItr.next(); + if (cell instanceof SharedMemoryBackedCell) { + listItr.set(((SharedMemoryBackedCell) cell).copy()); + } + } + } + } finally { + if (copyCellsFromSharedMem) { + // In case of copyCellsFromSharedMem==true (where the CPs wrap a scanner) we return + // the blocks then and there (for wrapped CPs) + this.returnCurrentBatch(); + } } return moreValues; } @@ -5757,6 +5804,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public synchronized void close() { + // The close would return all the blocks if (storeHeap != null) { storeHeap.close(); storeHeap = null; @@ -6366,6 +6414,54 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public Result get(final Get get) throws IOException { + prepareGet(get); + List results = get(get, true); + boolean stale = this.getRegionInfo().getReplicaId() != 0; + return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + } + + /** + * Do a get based on the get parameter. When this API is used, the row data + * will be sent back via controller's payload. + * + * @param get + * @param getCtxt - the GetContext which would internally set the cell block on the + * RpcController. + * @throws IOException + */ + public void get(final Get get, final GetContext getCtxt) + throws IOException { + prepareGet(get); + RegionScanner scanner = null; + try { + ArrayList results = new ArrayList(); + if (coprocessorHost != null) { + if (coprocessorHost.preGet(get, results)) { + // pre hook is bypassing actual get() operation + setResultInPayload(results, getCtxt, get.isCheckExistenceOnly()); + return; + } + } + + Scan scan = new Scan(get); + scanner = getScanner(scan, true); + scanner.next(results); + + // post-get CP hook + if (coprocessorHost != null) { + coprocessorHost.postGet(get, results); + } + // do after lock + metricsUpdate(results);// TODO 2 times interate over cells List . Avoid. + setResultInPayload(results, getCtxt, get.isCheckExistenceOnly()); + } finally { + if (scanner != null) { + scanner.close(); + } + } + } + + private void prepareGet(final Get get) throws IOException, NoSuchColumnFamilyException { checkRow(get.getRow(), "Get"); // Verify families are all valid if (get.hasFamilies()) { @@ -6377,21 +6473,35 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi get.addFamily(family); } } - List results = get(get, true); - boolean stale = this.getRegionInfo().getReplicaId() != 0; - return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + } + + private void setResultInPayload(ArrayList results, GetContext ctxt, + boolean existsOnly) throws IOException { + // We return result not as CellScanner but as a cellblock ByteBuffer which + // is ready to be sent + // as response to client. Then only we can allow the L2 cache block memory + // to be evicted. + ctxt.setCellsCountInBlock(results.size()); + if (!existsOnly) { + int totalCellSize = 0; + for (int i = 0; i < results.size(); i++) { + totalCellSize += KeyValueUtil.length(results.get(i)); + // TODO no need to consider tags length? May be better use the Encoder + // way to find the size. + } + ctxt.setCellBlock(results, totalCellSize); + } } @Override public List get(Get get, boolean withCoprocessor) throws IOException { - List results = new ArrayList(); // pre-get CP hook if (withCoprocessor && (coprocessorHost != null)) { - if (coprocessorHost.preGet(get, results)) { - return results; - } + if (coprocessorHost.preGet(get, results)) { + return results; + } } Scan scan = new Scan(get); @@ -6409,17 +6519,22 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi if (withCoprocessor && (coprocessorHost != null)) { coprocessorHost.postGet(get, results); } - // do after lock + metricsUpdate(results); + + return results; + } + + private void metricsUpdate(List results) { if (this.metricsRegion != null) { long totalSize = 0L; for (Cell cell : results) { + // This should give an estimate of the cell in the result. Why do we need + // to know the serialization of how the codec works with it?? totalSize += CellUtil.estimatedSerializedSizeOf(cell); } this.metricsRegion.updateGet(totalSize); } - - return results; } public void mutateRow(RowMutations rm) throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java index 9220d07..7114c13 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java @@ -21,8 +21,11 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.PriorityQueue; +import java.util.Set; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; @@ -45,6 +48,10 @@ import org.apache.hadoop.hbase.regionserver.ScannerContext.NextState; public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner implements KeyValueScanner, InternalScanner { protected PriorityQueue heap = null; + // Holds the scanners when a ever a eager close() happens. All such eagerly closed + // scans are collected and when the final scanner.close() happens will perform the + // actual close. + protected Set scannersForDelayedClose = new HashSet(); /** * The current sub-scanner, i.e. the one that contains the next key/value @@ -87,7 +94,7 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner if (scanner.peek() != null) { this.heap.add(scanner); } else { - scanner.close(); + this.scannersForDelayedClose.add(scanner); } } this.current = pollRealKV(); @@ -108,7 +115,8 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner Cell kvReturn = this.current.next(); Cell kvNext = this.current.peek(); if (kvNext == null) { - this.current.close(); + // Add the current scanner here instead of closing it. + this.scannersForDelayedClose.add(this.current); this.current = pollRealKV(); } else { KeyValueScanner topScanner = this.heap.peek(); @@ -154,7 +162,8 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner */ if (pee == null || !moreCells) { - this.current.close(); + // add the scanner that is to be closed + this.scannersForDelayedClose.add(this.current); } else { this.heap.add(this.current); } @@ -210,6 +219,10 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner } public void close() { + for (KeyValueScanner scanner : this.scannersForDelayedClose) { + scanner.close(); + } + this.scannersForDelayedClose.clear(); if (this.current != null) { this.current.close(); } @@ -311,7 +324,7 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner } if (!seekResult) { - scanner.close(); + this.scannersForDelayedClose.add(scanner); } else { heap.add(scanner); } @@ -364,12 +377,12 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner } else { // Close the scanner because we did a real seek and found out there // are no more KVs. - kvScanner.close(); + this.scannersForDelayedClose.add(kvScanner); } } else { // Close the scanner because it has already run out of KVs even before // we had to do a real seek on it. - kvScanner.close(); + this.scannersForDelayedClose.add(kvScanner); } kvScanner = heap.poll(); } @@ -398,4 +411,31 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner // here we return the next index key from the top scanner return current == null ? null : current.getNextIndexedKey(); } + + @Override + public void returnCurrentBatch() { + if(current != null) { + if(current instanceof BatchReturnableScanner) { + ((BatchReturnableScanner) current).returnCurrentBatch(); + } + } + if(heap != null) { + Iterator iterator = heap.iterator(); + while (iterator.hasNext()) { + KeyValueScanner scanner = iterator.next(); + if (scanner instanceof BatchReturnableScanner) { + ((BatchReturnableScanner) scanner).returnCurrentBatch(); + } + } + } + if (scannersForDelayedClose != null) { + Iterator iterator = scannersForDelayedClose.iterator(); + while (iterator.hasNext()) { + KeyValueScanner scanner = iterator.next(); + if (scanner instanceof BatchReturnableScanner) { + ((BatchReturnableScanner) scanner).returnCurrentBatch(); + } + } + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java index 76a9d0f..56a2f85 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hbase.client.Scan; * Scanner that returns the next KeyValue. */ @InterfaceAudience.Private -public interface KeyValueScanner { +public interface KeyValueScanner extends BatchReturnableScanner { /** * Look at the next Cell in this scanner, but do not iterate scanner. * @return the next Cell diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java index 957f417..da8024a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java @@ -71,4 +71,8 @@ public abstract class NonLazyKeyValueScanner implements KeyValueScanner { public Cell getNextIndexedKey() { return null; } + + @Override + public void returnCurrentBatch() { + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index aedd351..eb460df 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -153,7 +153,9 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor; import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor; import org.apache.hadoop.hbase.quotas.OperationQuota; +import org.apache.hadoop.hbase.quotas.OperationQuota.OperationType; import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager; +import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl; import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException; import org.apache.hadoop.hbase.regionserver.Region.FlushResult; import org.apache.hadoop.hbase.regionserver.Region.Operation; @@ -287,7 +289,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, if (region != null && region.getCoprocessorHost() != null) { region.getCoprocessorHost().preScannerClose(s); } - s.close(); if (region != null && region.getCoprocessorHost() != null) { region.getCoprocessorHost().postScannerClose(s); @@ -364,8 +365,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, return context != null && context.isClientCellBlockSupport(); } - private void addResult(final MutateResponse.Builder builder, - final Result result, final PayloadCarryingRpcController rpcc) { + private void addResult(final MutateResponse.Builder builder, final Result result, + final PayloadCarryingRpcController rpcc) throws ServiceException { if (result == null) return; if (isClientCellBlockSupport()) { builder.setResult(ProtobufUtil.toResultNoData(result)); @@ -377,16 +378,18 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } private void addResults(final ScanResponse.Builder builder, final List results, - final RpcController controller, boolean isDefaultRegion) { + final RpcController controller, boolean isDefaultRegion) throws IOException { builder.setStale(!isDefaultRegion); if (results == null || results.isEmpty()) return; - if (isClientCellBlockSupport()) { + // TODO we can avoid this isClientCellBlockSupport() every time. + if (isClientCellBlockSupport() && controller instanceof PayloadCarryingRpcController) { for (Result res : results) { builder.addCellsPerResult(res.size()); builder.addPartialFlagPerResult(res.isPartial()); } - ((PayloadCarryingRpcController)controller). - setCellScanner(CellUtil.createCellScanner(results)); + // Set cellBlock as the payload. + ((PayloadCarryingRpcController) controller). + setCellBlock(CellUtil.createCellScanner(results)); } else { for (Result res: results) { ClientProtos.Result pbr = ProtobufUtil.toResult(res); @@ -562,11 +565,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler, * @param builder * @param cellsToReturn Could be null. May be allocated in this method. This is what this * method returns as a 'result'. + * @param controller * @return Return the cellScanner passed */ private List doNonAtomicRegionMutation(final Region region, final OperationQuota quota, final RegionAction actions, final CellScanner cellScanner, - final RegionActionResult.Builder builder, List cellsToReturn, long nonceGroup) { + final RegionActionResult.Builder builder, List cellsToReturn, long nonceGroup, + PayloadCarryingRpcController controller) { // Gather up CONTIGUOUS Puts and Deletes in this mutations List. Idea is that rather than do // one at a time, we instead pass them in batch. Be aware that the corresponding // ResultOrException instance that matches each Put or Delete is then added down in the @@ -578,6 +583,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, Result r = null; if (action.hasGet()) { Get get = ProtobufUtil.toGet(action.getGet()); + // TODO : Handle cases where for multi() also we could create a cellblock r = region.get(get); } else if (action.hasServiceCall()) { resultOrExceptionBuilder = ResultOrException.newBuilder(); @@ -1873,6 +1879,9 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } byte[] row = get.getRow().toByteArray(); byte[] family = get.getColumn(0).getFamily().toByteArray(); + // Always go with PB mode only and do not go with the Payload mode. + // This is because we need the result in the postGetClosedRowhook which + // we cannot get in case of Payload based result. r = region.getClosestRowBefore(row, family); } else { Get clientGet = ProtobufUtil.toGet(get); @@ -1880,9 +1889,22 @@ public class RSRpcServices implements HBaseRPCErrorHandler, existence = region.getCoprocessorHost().preExists(clientGet); } if (existence == null) { - r = region.get(clientGet); + GetContext getCtxt = null; + if (controller instanceof PayloadCarryingRpcController && isClientCellBlockSupport()) { + getCtxt = new GetContext( + (PayloadCarryingRpcController) controller); + ((HRegion) region).get(clientGet, getCtxt); + } else { + r = region.get(clientGet); + } if (get.getExistenceOnly()) { - boolean exists = r.getExists(); + boolean exists = false; + if (r == null && getCtxt != null) { + exists = + getCtxt.getCellsCountInBlock() != 0; + } else { + exists = r.getExists(); + } if (region.getCoprocessorHost() != null) { exists = region.getCoprocessorHost().postExists(clientGet, exists); } @@ -1890,16 +1912,32 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } } } - if (existence != null){ - ClientProtos.Result pbr = - ProtobufUtil.toResult(existence, region.getRegionInfo().getReplicaId() != 0); + if (existence != null) { + ClientProtos.Result pbr = ProtobufUtil.toResult(existence, region.getRegionInfo() + .getReplicaId() != 0); builder.setResult(pbr); } else if (r != null) { + // don't copy the results here because already we have done the copy ClientProtos.Result pbr = ProtobufUtil.toResult(r); builder.setResult(pbr); + } else { + ClientProtos.Result.Builder pbr = ClientProtos.Result.newBuilder(); + pbr.setAssociatedCellCount(((PayloadCarryingRpcController) controller) + .getCellsCountInBlock()); + boolean stale = region.getRegionInfo().getReplicaId() != 0; + pbr.setStale(stale); + builder.setResult(pbr); } + if (r != null) { quota.addGetResult(r); + } else { + // When result is null, the result cell bytes are set as payload in controller + if (controller != null + && ((PayloadCarryingRpcController) controller).getCellBlock() != null) { + quota.addOperationSize(OperationType.GET, ((PayloadCarryingRpcController) controller) + .getCellBlock().limit()); + } } return builder.build(); } catch (IOException ie) { @@ -1935,7 +1973,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // It is also the conduit via which we pass back data. PayloadCarryingRpcController controller = (PayloadCarryingRpcController)rpcc; CellScanner cellScanner = controller != null ? controller.cellScanner(): null; - if (controller != null) controller.setCellScanner(null); + if (controller != null) controller.clearPayload(); long nonceGroup = request.hasNonceGroup() ? request.getNonceGroup() : HConstants.NO_NONCE; @@ -1992,7 +2030,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } else { // doNonAtomicRegionMutation manages the exception internally cellsToReturn = doNonAtomicRegionMutation(region, quota, regionAction, cellScanner, - regionActionResultBuilder, cellsToReturn, nonceGroup); + regionActionResultBuilder, cellsToReturn, nonceGroup, controller); } responseBuilder.addRegionActionResult(regionActionResultBuilder.build()); quota.close(); @@ -2021,7 +2059,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, CellScanner cellScanner = controller != null? controller.cellScanner(): null; OperationQuota quota = null; // Clear scanner so we are not holding on to reference across call. - if (controller != null) controller.setCellScanner(null); + if (controller != null) controller.clearPayload(); try { checkOpen(); requestCount.increment(); @@ -2174,6 +2212,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, boolean moreResults = true; boolean closeScanner = false; boolean isSmallScan = false; + RegionScanner actualRegionScanner = null; ScanResponse.Builder builder = ScanResponse.newBuilder(); if (request.hasCloseScanner()) { closeScanner = request.getCloseScanner(); @@ -2218,11 +2257,29 @@ public class RSRpcServices implements HBaseRPCErrorHandler, scanner = region.getCoprocessorHost().preScannerOpen(scan); } if (scanner == null) { - scanner = region.getScanner(scan); + if (controller instanceof PayloadCarryingRpcController && isClientCellBlockSupport()) { + // always pass the controller to indicate this scan comes from a client + scanner = ((HRegion) region) + .getScanner(scan, true); + } else { + scanner = region.getScanner(scan); + } } + actualRegionScanner = scanner; + // TODO why post hook is called even when pre hook returned a RegionScanner. Fix if (region.getCoprocessorHost() != null) { scanner = region.getCoprocessorHost().postScannerOpen(scan, scanner); } + if (actualRegionScanner != scanner && !(scanner instanceof BatchReturnableScanner)) { + // It means the RegionScanner has been wrapped + if (actualRegionScanner instanceof RegionScannerImpl) { + // Copy the results when nextRaw is called from the CP so that + // CP can have a cloned version of the results without bothering + // about the eviction. Ugly, yes!!!. If the CP impl does not use + // the actual scanner we are gone ??? + ((RegionScannerImpl) actualRegionScanner).setCopyCellsFromSharedMem(true); + } + } scannerId = addScanner(scanner, region); scannerName = String.valueOf(scannerId); ttl = this.scannerLeaseTimeoutPeriod; @@ -2445,7 +2502,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, moreResults = false; results = null; } else { - addResults(builder, results, controller, RegionReplicaUtil.isDefaultReplica(region.getRegionInfo())); + addResults(builder, results, controller, + RegionReplicaUtil.isDefaultReplica(region.getRegionInfo())); } } catch (IOException e) { // if we have an exception on scanner next and we are using the callSeq @@ -2456,6 +2514,11 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } throw e; } finally { + // clear the block references here. + // The nextRaw() call would handle the new block + if (scanner instanceof BatchReturnableScanner) { + ((BatchReturnableScanner) scanner).returnCurrentBatch(); + } // We're done. On way out re-add the above removed lease. // Adding resets expiration time on lease. if (scanners.containsKey(scannerName)) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java index 5167b4e..6914132 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java @@ -85,7 +85,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap { } if (!scanner.seekToPreviousRow(seekKey)) { - scanner.close(); + this.scannersForDelayedClose.add(scanner); } else { heap.add(scanner); } @@ -114,7 +114,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap { return current != null; } if (!scanner.backwardSeek(seekKey)) { - scanner.close(); + this.scannersForDelayedClose.add(scanner); } else { heap.add(scanner); } @@ -134,7 +134,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap { if (this.current.seekToPreviousRow(kvReturn)) { this.heap.add(this.current); } else { - this.current.close(); + this.scannersForDelayedClose.add(this.current); } this.current = pollRealKV(); } else { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java index d7ef6e8..ab23b65 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java @@ -39,12 +39,13 @@ class ReversedRegionScannerImpl extends RegionScannerImpl { * @param scan * @param additionalScanners * @param region + * @param isCellBlockSupported * @throws IOException */ ReversedRegionScannerImpl(Scan scan, - List additionalScanners, HRegion region) + List additionalScanners, HRegion region, boolean isCellBlockSupported) throws IOException { - region.super(scan, additionalScanners, region); + region.super(scan, additionalScanners, region, isCellBlockSupported); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index eba3689..cb372e3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.util.BloomFilterWriter; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; import com.google.common.base.Function; import com.google.common.base.Preconditions; @@ -1049,6 +1050,7 @@ public class StoreFile { private long deleteFamilyCnt = -1; private boolean bulkLoadResult = false; private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null; + private CacheConfig cacheConf; public Reader(FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf) throws IOException { @@ -1058,6 +1060,7 @@ public class StoreFile { public Reader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size, CacheConfig cacheConf, Configuration conf) throws IOException { + this.cacheConf = cacheConf; reader = HFile.createReader(fs, path, in, size, cacheConf, conf); bloomFilterType = BloomType.NONE; } @@ -1284,7 +1287,7 @@ public class StoreFile { // Empty file if (reader.getTrailer().getEntryCount() == 0) return false; - + HFileBlock bloomBlock = null; try { boolean shouldCheckBloom; ByteBuffer bloom; @@ -1292,8 +1295,8 @@ public class StoreFile { bloom = null; shouldCheckBloom = true; } else { - bloom = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, - true); + bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true); + bloom = bloomBlock.getBufferWithoutHeader(); shouldCheckBloom = bloom != null; } @@ -1345,8 +1348,9 @@ public class StoreFile { } catch (IllegalArgumentException e) { LOG.error("Bad bloom filter data -- proceeding without", e); setGeneralBloomFilterFaulty(); + } finally { + reader.returnBlock(bloomBlock); } - return true; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java index 42a378d..dff9471 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java @@ -27,8 +27,6 @@ import java.util.List; import java.util.SortedSet; import java.util.concurrent.atomic.AtomicLong; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; @@ -46,7 +44,6 @@ import org.apache.hadoop.hbase.regionserver.StoreFile.Reader; */ @InterfaceAudience.LimitedPrivate("Coprocessor") public class StoreFileScanner implements KeyValueScanner { - private static final Log LOG = LogFactory.getLog(HStore.class); // the reader it comes from: private final StoreFile.Reader reader; @@ -158,7 +155,7 @@ public class StoreFileScanner implements KeyValueScanner { try { try { if(!seekAtOrAfter(hfs, key)) { - close(); + cur = null; return false; } @@ -185,7 +182,7 @@ public class StoreFileScanner implements KeyValueScanner { try { try { if (!reseekAtOrAfter(hfs, key)) { - close(); + cur = null; return false; } setCurrentCell(hfs.getKeyValue()); @@ -229,7 +226,6 @@ public class StoreFileScanner implements KeyValueScanner { } if (cur == null) { - close(); return false; } @@ -237,8 +233,8 @@ public class StoreFileScanner implements KeyValueScanner { } public void close() { - // Nothing to close on HFileScanner? cur = null; + this.hfs.close(); } /** @@ -429,7 +425,7 @@ public class StoreFileScanner implements KeyValueScanner { key.getRowLength()); if (seekCount != null) seekCount.incrementAndGet(); if (!hfs.seekBefore(seekKey)) { - close(); + this.cur = null; return false; } KeyValue firstKeyOfPreviousRow = KeyValueUtil.createFirstOnRow(hfs.getKeyValue() @@ -437,7 +433,7 @@ public class StoreFileScanner implements KeyValueScanner { if (seekCount != null) seekCount.incrementAndGet(); if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) { - close(); + this.cur = null; return false; } @@ -494,4 +490,9 @@ public class StoreFileScanner implements KeyValueScanner { public Cell getNextIndexedKey() { return hfs.getNextIndexedKey(); } + + @Override + public void returnCurrentBatch() { + this.hfs.returnCurrentBatch(); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index cbca57b..4c4d8f0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -22,8 +22,11 @@ package org.apache.hadoop.hbase.regionserver; import java.io.IOException; import java.io.InterruptedIOException; import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.NavigableSet; +import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.locks.ReentrantLock; @@ -53,8 +56,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; * into List for a single row. */ @InterfaceAudience.Private -public class StoreScanner extends NonReversedNonLazyKeyValueScanner - implements KeyValueScanner, InternalScanner, ChangedReadersObserver { +public class StoreScanner extends NonReversedNonLazyKeyValueScanner implements KeyValueScanner, + InternalScanner, ChangedReadersObserver { private static final Log LOG = LogFactory.getLog(StoreScanner.class); protected Store store; protected ScanQueryMatcher matcher; @@ -84,6 +87,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner protected final long maxRowSize; protected final long cellsPerHeartbeatCheck; + // Collects all the KVHeap that are eagerly getting closed during the + // course of a scan + protected Set heapsForDelayedClose = new HashSet(); + /** * The number of KVs seen by the scanner. Includes explicitly skipped KVs, but not * KVs skipped via seeking to next row/column. TODO: estimate them? @@ -371,9 +378,8 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner } } - protected void resetKVHeap(List scanners, - CellComparator comparator) throws IOException { - // Combine all seeked scanners with a heap + protected void resetKVHeap(List scanners, CellComparator comparator) + throws IOException { heap = new KeyValueHeap(scanners, comparator); } @@ -437,17 +443,38 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner @Override public void close() { + close(true); + } + + protected void close(boolean heapClose) { lock.lock(); try { - if (this.closing) return; - this.closing = true; - // under test, we dont have a this.store - if (this.store != null) - this.store.deleteChangedReaderObserver(this); - if (this.heap != null) - this.heap.close(); - this.heap = null; // CLOSED! - this.lastTop = null; // If both are null, we are closed. + if (this.closing) { + if(heapClose) { + returnCurrentBatch(); + } + return; + } + this.closing = true; + // under test, we dont have a this.store + if (this.store != null) + this.store.deleteChangedReaderObserver(this); + if (heapClose) { + for (KeyValueHeap h : this.heapsForDelayedClose) { + h.close(); + } + this.heapsForDelayedClose.clear(); + if (this.heap != null) { + this.heap.close(); + this.heap = null; // CLOSED! + } + } else { + if (this.heap != null) { + this.heapsForDelayedClose.add(this.heap); + this.heap = null; + } + } + this.lastTop = null; // If both are null, we are closed. } finally { lock.unlock(); } @@ -491,13 +518,16 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner // if the heap was left null, then the scanners had previously run out anyways, close and // return. if (this.heap == null) { - close(); + // By this time partial close should happened because already heap is null + close(false); return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } Cell peeked = this.heap.peek(); if (peeked == null) { - close(); + // This is a case where we for sure know that the heap is not going to give us any more results + // we could mark the heap to be closed except that we cannot return the blocks + close(false); return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } @@ -537,7 +567,6 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner if (prevCell != cell) ++kvsScanned; // Do object compare - we set prevKV from the same heap. checkScanOrder(prevCell, cell, comparator); prevCell = cell; - ScanQueryMatcher.MatchCode qcode = matcher.match(cell); qcode = optimize(qcode, cell); switch(qcode) { @@ -604,7 +633,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues(); case DONE_SCAN: - close(); + close(false); return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); case SEEK_NEXT_ROW: @@ -645,7 +674,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner } // No more keys - close(); + close(false); return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } finally { lock.unlock(); @@ -704,8 +733,8 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner //DebugPrint.println("SS updateReaders, topKey = " + lastTop); - // close scanners to old obsolete Store files - this.heap.close(); // bubble thru and close all scanners. + // add all the scanners to be closed to this list + this.heapsForDelayedClose.add(this.heap); this.heap = null; // the re-seeks could be slow (access HDFS) free up memory ASAP // Let the next() call handle re-creating and seeking @@ -884,5 +913,21 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner public Cell getNextIndexedKey() { return this.heap.getNextIndexedKey(); } + + @Override + public void returnCurrentBatch() { + if (heap != null) { + this.heap.returnCurrentBatch(); + } + if (heapsForDelayedClose != null) { + Iterator iterator = heapsForDelayedClose.iterator(); + while (iterator.hasNext()) { + KeyValueHeap scanner = iterator.next(); + if (scanner != null && scanner instanceof BatchReturnableScanner) { + ((BatchReturnableScanner) scanner).returnCurrentBatch(); + } + } + } + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java index 984742f..8b05d8f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java @@ -120,6 +120,8 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase result = BloomFilterUtil.contains(key, keyOffset, keyLength, bloomBuf, bloomBlock.headerSize(), bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount); + // After the use return back the block if it was served from a cache. + reader.returnBlock(bloomBlock); } if (numQueriesPerChunk != null && block >= 0) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index f376e3d..0145cec 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -1646,6 +1646,24 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { return (HTable) getConnection().getTable(tableName); } + public HTable createTable(TableName tableName, byte[][] families, + int numVersions, int blockSize, String cpName) throws IOException { + HTableDescriptor desc = new HTableDescriptor(tableName); + for (byte[] family : families) { + HColumnDescriptor hcd = new HColumnDescriptor(family) + .setMaxVersions(numVersions) + .setBlocksize(blockSize); + desc.addFamily(hcd); + } + if(cpName != null) { + desc.addCoprocessor(cpName); + } + getHBaseAdmin().createTable(desc); + // HBaseAdmin only waits for regions to appear in hbase:meta we should wait until they are assigned + waitUntilAllRegionsAssigned(tableName); + return (HTable) getConnection().getTable(tableName); + } + /** * Create a table. * @param tableName diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java index 5c0284f..4f549e4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java @@ -90,6 +90,10 @@ public class TestAcidGuarantees implements Tool { // prevent aggressive region split conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY, ConstantSizeRegionSplitPolicy.class.getName()); + conf.setInt("hbase.bucketcache.size", 400); + conf.setStrings("hbase.bucketcache.ioengine", "heap"); + conf.setFloat("hfile.block.cache.size", 0.2f); + conf.setFloat("hbase.regionserver.global.memstore.size", 0.1f); util = new HBaseTestingUtility(conf); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java new file mode 100644 index 0000000..4cfab35 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java @@ -0,0 +1,1320 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Iterator; +import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; +import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; +import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint; +import org.apache.hadoop.hbase.coprocessor.ObserverContext; +import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.CachedBlock; +import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; +import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.Region; +import org.apache.hadoop.hbase.regionserver.RegionScanner; +import org.apache.hadoop.hbase.regionserver.ScannerContext; +import org.apache.hadoop.hbase.regionserver.Store; +import org.apache.hadoop.hbase.testclassification.ClientTests; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ LargeTests.class, ClientTests.class }) +@SuppressWarnings("deprecation") +public class TestBlockEvictionFromClient { + private static final Log LOG = LogFactory.getLog(TestBlockEvictionFromClient.class); + protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + static byte[][] ROWS = new byte[2][]; + private static int NO_OF_THREADS = 3; + private static byte[] ROW = Bytes.toBytes("testRow"); + private static byte[] ROW1 = Bytes.toBytes("testRow1"); + private static byte[] FAMILY = Bytes.toBytes("testFamily"); + private static byte[][] FAMILIES_1 = new byte[1][0]; + private static byte[] QUALIFIER = Bytes.toBytes("testQualifier"); + private static byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + private static byte[] data = new byte[1000]; + private static byte[] data2 = Bytes.add(data, data); + protected static int SLAVES = 1; + private static CountDownLatch latch; + private static CountDownLatch getLatch; + private static CountDownLatch compactionLatch; + private static CountDownLatch exceptionLatch; + + /** + * @throws java.lang.Exception + */ + @BeforeClass + public static void setUpBeforeClass() throws Exception { + ROWS[0] = ROW; + ROWS[1] = ROW1; + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY, + MultiRowMutationEndpoint.class.getName()); + conf.setBoolean("hbase.table.sanity.checks", true); // enable for below + // tests + conf.setInt("hbase.regionserver.handler.count", 20); + conf.setInt("hbase.bucketcache.size", 400); + conf.setStrings("hbase.bucketcache.ioengine", "heap"); + conf.setFloat("hfile.block.cache.size", 0.2f); + conf.setFloat("hbase.regionserver.global.memstore.size", 0.1f); + conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); + conf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 5000); + FAMILIES_1[0] = FAMILY; + TEST_UTIL.startMiniCluster(SLAVES); + } + + /** + * @throws java.lang.Exception + */ + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception { + CustomInnerRegionObserver.waitForGets.set(false); + CustomInnerRegionObserver.countOfNext.set(0); + CustomInnerRegionObserver.countOfGets.set(0); + } + + /** + * @throws java.lang.Exception + */ + @After + public void tearDown() throws Exception { + if (latch != null) { + while (latch.getCount() > 0) { + latch.countDown(); + } + } + if (getLatch != null) { + getLatch.countDown(); + } + if (compactionLatch != null) { + compactionLatch.countDown(); + } + if (exceptionLatch != null) { + exceptionLatch.countDown(); + } + latch = null; + getLatch = null; + compactionLatch = null; + exceptionLatch = null; + CustomInnerRegionObserver.throwException.set(false); + // Clean up the tables for every test case + TableName[] listTableNames = TEST_UTIL.getHBaseAdmin().listTableNames(); + for (TableName tableName : listTableNames) { + if (!tableName.isSystemTable()) { + TEST_UTIL.getHBaseAdmin().disableTable(tableName); + TEST_UTIL.getHBaseAdmin().deleteTable(tableName); + } + } + } + + @Test + public void testBlockEvictionWithParallelScans() throws Exception { + HTable table = null; + try { + latch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testBlockEvictionWithParallelScans"); + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + + // insert data. 2 Rows are added + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data)); + // data was in memstore so don't expect any changes + // flush the data + System.out.println("Flushing cache in problematic area"); + // Should create one Hfile with 2 blocks + region.flush(true); + // Load cache + // Create three sets of scan + ScanThread[] scanThreads = initiateScan(table, false); + Thread.sleep(100); + checkForBlockEviction(cache, false, false, false); + for (ScanThread thread : scanThreads) { + thread.join(); + } + // CustomInnerRegionObserver.sleepTime.set(0); + Iterator iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // read the data and expect same blocks, one new hit, no misses + assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data)); + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // Check how this miss is happening + // insert a second column, read the row, no new blocks, 3 new hits + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + byte[] data2 = Bytes.add(data, data); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + Result r = table.get(new Get(ROW)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2)); + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // flush, one new block + System.out.println("Flushing cache"); + region.flush(true); + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // compact, net minus two blocks, two hits, no misses + System.out.println("Compacting"); + assertEquals(2, store.getStorefilesCount()); + store.triggerMajorCompaction(); + region.compact(true); + waitForStoreFileCount(store, 1, 10000); // wait 10 seconds max + assertEquals(1, store.getStorefilesCount()); + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // read the row, this should be a cache miss because we don't cache data + // blocks on compaction + r = table.get(new Get(ROW)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2)); + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testParallelGetsAndScans() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(2); + // Check if get() returns blocks on its close() itself + getLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testParallelGetsAndScans"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + + insertData(table); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + region.flush(true); + // Create three sets of scan + CustomInnerRegionObserver.waitForGets.set(true); + ScanThread[] scanThreads = initiateScan(table, false); + // Create three sets of gets + GetThread[] getThreads = initiateGet(table, false, false); + checkForBlockEviction(cache, false, false, false); + CustomInnerRegionObserver.waitForGets.set(false); + checkForBlockEviction(cache, false, false, false); + for (GetThread thread : getThreads) { + thread.join(); + } + // Verify whether the gets have returned the blocks that it had + CustomInnerRegionObserver.waitForGets.set(true); + // giving some time for the block to be decremented + checkForBlockEviction(cache, true, false, false); + getLatch.countDown(); + for (ScanThread thread : scanThreads) { + thread.join(); + } + System.out.println("Scans should have returned the bloks"); + // Check with either true or false + CustomInnerRegionObserver.waitForGets.set(false); + // The scan should also have released the blocks by now + checkForBlockEviction(cache, true, true, false); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testGetWithCellsInDifferentFiles() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + // Check if get() returns blocks on its close() itself + getLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testGetWithCellsInDifferentFiles"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + region.flush(true); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + CustomInnerRegionObserver.waitForGets.set(true); + // Create three sets of gets + GetThread[] getThreads = initiateGet(table, false, false); + Thread.sleep(200); + CustomInnerRegionObserver.getCdl().get().countDown(); + for (GetThread thread : getThreads) { + thread.join(); + } + // Verify whether the gets have returned the blocks that it had + CustomInnerRegionObserver.waitForGets.set(true); + // giving some time for the block to be decremented + checkForBlockEviction(cache, true, false, false); + getLatch.countDown(); + System.out.println("Gets should have returned the bloks"); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + // TODO : check how block index works here + public void testGetsWithMultiColumnsAndExplicitTracker() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + // Check if get() returns blocks on its close() itself + getLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testGetsWithMultiColumnsAndExplicitTracker"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + BlockCache cache = setCacheProperties(region); + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + for (int i = 1; i < 10; i++) { + put = new Put(ROW); + put.add(FAMILY, Bytes.toBytes("testQualifier" + i), data2); + table.put(put); + if (i % 2 == 0) { + region.flush(true); + } + } + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + region.flush(true); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + CustomInnerRegionObserver.waitForGets.set(true); + // Create three sets of gets + GetThread[] getThreads = initiateGet(table, true, false); + Thread.sleep(200); + Iterator iterator = cache.iterator(); + boolean usedBlocksFound = false; + int refCount = 0; + int noOfBlocksWithRef = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + System.out.println("The refCount is " + refCount); + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + noOfBlocksWithRef++; + } + } + assertTrue(usedBlocksFound); + // the number of blocks referred + assertEquals(10, noOfBlocksWithRef); + CustomInnerRegionObserver.getCdl().get().countDown(); + for (GetThread thread : getThreads) { + thread.join(); + } + // Verify whether the gets have returned the blocks that it had + CustomInnerRegionObserver.waitForGets.set(true); + // giving some time for the block to be decremented + checkForBlockEviction(cache, true, false, false); + getLatch.countDown(); + System.out.println("Gets should have returned the bloks"); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testGetWithMultipleColumnFamilies() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + // Check if get() returns blocks on its close() itself + getLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testGetWithMultipleColumnFamilies"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + byte[][] fams = new byte[10][]; + fams[0] = FAMILY; + for (int i = 1; i < 10; i++) { + fams[i] = (Bytes.toBytes("testFamily" + i)); + } + table = TEST_UTIL.createTable(tableName, fams, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + BlockCache cache = setCacheProperties(region); + + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + for (int i = 1; i < 10; i++) { + put = new Put(ROW); + put.add(Bytes.toBytes("testFamily" + i), Bytes.toBytes("testQualifier" + i), data2); + table.put(put); + if (i % 2 == 0) { + region.flush(true); + } + } + region.flush(true); + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + region.flush(true); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + CustomInnerRegionObserver.waitForGets.set(true); + // Create three sets of gets + GetThread[] getThreads = initiateGet(table, true, true); + Thread.sleep(200); + Iterator iterator = cache.iterator(); + boolean usedBlocksFound = false; + int refCount = 0; + int noOfBlocksWithRef = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + System.out.println("The refCount is " + refCount); + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + noOfBlocksWithRef++; + } + } + assertTrue(usedBlocksFound); + // the number of blocks referred + assertEquals(3, noOfBlocksWithRef); + CustomInnerRegionObserver.getCdl().get().countDown(); + for (GetThread thread : getThreads) { + thread.join(); + } + // Verify whether the gets have returned the blocks that it had + CustomInnerRegionObserver.waitForGets.set(true); + // giving some time for the block to be decremented + checkForBlockEviction(cache, true, false, false); + getLatch.countDown(); + System.out.println("Gets should have returned the bloks"); + } finally { + if (table != null) { + table.close(); + } + } + } + + //@Test + public void testScanWithMultipleColumnFamilies() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + // Check if get() returns blocks on its close() itself + TableName tableName = TableName.valueOf("testScanWithMultipleColumnFamilies"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + byte[][] fams = new byte[10][]; + fams[0] = FAMILY; + for (int i = 1; i < 10; i++) { + fams[i] = (Bytes.toBytes("testFamily" + i)); + } + table = TEST_UTIL.createTable(tableName, fams, 1, 1024, + CustomInnerRegionObserver.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + BlockCache cache = setCacheProperties(region); + + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + region.flush(true); + for (int i = 1; i < 10; i++) { + put = new Put(ROW); + put.add(Bytes.toBytes("testFamily" + i), Bytes.toBytes("testQualifier" + i), data2); + table.put(put); + if (i % 2 == 0) { + region.flush(true); + } + } + region.flush(true); + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + region.flush(true); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + // Create three sets of gets + ScanThread[] scanThreads = initiateScan(table, true); + Thread.sleep(200); + Iterator iterator = cache.iterator(); + boolean usedBlocksFound = false; + int refCount = 0; + int noOfBlocksWithRef = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + System.out.println("The refCount is " + refCount); + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + noOfBlocksWithRef++; + } + } + assertTrue(usedBlocksFound); + // the number of blocks referred + assertEquals(12, noOfBlocksWithRef); + CustomInnerRegionObserver.getCdl().get().countDown(); + for (ScanThread thread : scanThreads) { + thread.join(); + } + // giving some time for the block to be decremented + checkForBlockEviction(cache, true, false, false); + } finally { + if (table != null) { + table.close(); + } + } + } + + private BlockCache setCacheProperties(Region region) { + Iterator strItr = region.getStores().iterator(); + BlockCache cache = null; + while (strItr.hasNext()) { + Store store = strItr.next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + // Use the last one + cache = cacheConf.getBlockCache(); + } + return cache; + } + + @Test + public void testParallelGetsAndScanWithWrappedRegionScanner() throws IOException, + InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(2); + // Check if get() returns blocks on its close() itself + getLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testParallelGetsAndScanWithWrappedRegionScanner"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserverWrapper.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + + // insert data. 2 Rows are added + insertData(table); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + region.flush(true); + // CustomInnerRegionObserver.sleepTime.set(5000); + // Create three sets of scan + CustomInnerRegionObserver.waitForGets.set(true); + ScanThread[] scanThreads = initiateScan(table, false); + // Create three sets of gets + GetThread[] getThreads = initiateGet(table, false, false); + // The block would have been decremented for the scan case as it was + // wrapped + // before even the postNext hook gets executed. + // giving some time for the block to be decremented + Thread.sleep(100); + CustomInnerRegionObserver.waitForGets.set(false); + checkForBlockEviction(cache, false, false, true); + // countdown the latch + CustomInnerRegionObserver.getCdl().get().countDown(); + for (GetThread thread : getThreads) { + thread.join(); + } + getLatch.countDown(); + for (ScanThread thread : scanThreads) { + thread.join(); + } + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testScanWithCompaction() throws IOException, InterruptedException { + testScanWithCompactionInternals("testScanWithCompaction", false); + } + + @Test + public void testReverseScanWithCompaction() throws IOException, InterruptedException { + testScanWithCompactionInternals("testReverseScanWithCompaction", true); + } + + private void testScanWithCompactionInternals(String tableNameStr, boolean reversed) + throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + compactionLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf(tableNameStr); + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserverWrapper.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + + // insert data. 2 Rows are added + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data)); + // Should create one Hfile with 2 blocks + region.flush(true); + // read the data and expect same blocks, one new hit, no misses + int refCount = 0; + // Check how this miss is happening + // insert a second column, read the row, no new blocks, 3 new hits + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + byte[] data2 = Bytes.add(data, data); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + // flush, one new block + System.out.println("Flushing cache"); + region.flush(true); + Iterator iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + // Create three sets of scan + ScanThread[] scanThreads = initiateScan(table, reversed); + Thread.sleep(100); + iterator = cache.iterator(); + boolean usedBlocksFound = false; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + } + } + assertTrue("Blocks with non zero ref count should be found ", usedBlocksFound); + usedBlocksFound = false; + System.out.println("Compacting"); + assertEquals(2, store.getStorefilesCount()); + store.triggerMajorCompaction(); + region.compact(true); + waitForStoreFileCount(store, 1, 10000); // wait 10 seconds max + assertEquals(1, store.getStorefilesCount()); + // Even after compaction is done we will have some blocks that cannot + // be evicted this is because the scan is still referencing them + iterator = cache.iterator(); + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 as they are not yet cleared + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + } + } + assertTrue("Blocks with non zero ref count should be found ", usedBlocksFound); + // Should not throw exception + compactionLatch.countDown(); + latch.countDown(); + for (ScanThread thread : scanThreads) { + thread.join(); + } + // by this time all blocks should have been evicted + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + Result r = table.get(new Get(ROW)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2)); + // The gets would be working on new blocks + iterator = cache.iterator(); + iterateBlockCache(cache, iterator); + } finally { + if (table != null) { + table.close(); + } + } + } + + @Test + public void testScanWithException() throws IOException, InterruptedException { + HTable table = null; + try { + latch = new CountDownLatch(1); + exceptionLatch = new CountDownLatch(1); + TableName tableName = TableName.valueOf("testScanWithException"); + // Create KV that will give you two blocks + // Create a table with block size as 1024 + table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024, + CustomInnerRegionObserverWrapper.class.getName()); + // get the block cache and region + String regionName = table.getRegionLocations().firstKey().getEncodedName(); + Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions( + regionName); + Store store = region.getStores().iterator().next(); + CacheConfig cacheConf = store.getCacheConfig(); + cacheConf.setCacheDataOnWrite(true); + cacheConf.setEvictOnClose(true); + BlockCache cache = cacheConf.getBlockCache(); + // insert data. 2 Rows are added + insertData(table); + // flush the data + System.out.println("Flushing cache"); + // Should create one Hfile with 2 blocks + region.flush(true); + // CustomInnerRegionObserver.sleepTime.set(5000); + CustomInnerRegionObserver.throwException.set(true); + ScanThread[] scanThreads = initiateScan(table, false); + // The block would have been decremented for the scan case as it was + // wrapped + // before even the postNext hook gets executed. + // giving some time for the block to be decremented + Thread.sleep(100); + Iterator iterator = cache.iterator(); + boolean usedBlocksFound = false; + int refCount = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + } + } + assertTrue(usedBlocksFound); + exceptionLatch.countDown(); + // countdown the latch + CustomInnerRegionObserver.getCdl().get().countDown(); + for (ScanThread thread : scanThreads) { + thread.join(); + } + iterator = cache.iterator(); + usedBlocksFound = false; + refCount = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + if (refCount != 0) { + // Blocks will be with count 3 + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + } + } + assertTrue(usedBlocksFound); + // Sleep till the scan lease would expire? Can we reduce this value? + Thread.sleep(5100); + iterator = cache.iterator(); + usedBlocksFound = false; + refCount = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + System.out.println("The ref count is "+refCount); + if (refCount != 0) { + // Blocks will be with count 3 + assertEquals(NO_OF_THREADS, refCount); + usedBlocksFound = true; + } + } + assertFalse(usedBlocksFound); + } finally { + if (table != null) { + table.close(); + } + } + } + + private void iterateBlockCache(BlockCache cache, Iterator iterator) { + int refCount; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + assertEquals(0, refCount); + } + } + + private void insertData(HTable table) throws IOException { + Put put = new Put(ROW); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + put = new Put(ROW1); + put.add(FAMILY, QUALIFIER, data); + table.put(put); + byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER); + put = new Put(ROW); + put.add(FAMILY, QUALIFIER2, data2); + table.put(put); + } + + private ScanThread[] initiateScan(HTable table, boolean reverse) throws IOException, + InterruptedException { + ScanThread[] scanThreads = new ScanThread[NO_OF_THREADS]; + for (int i = 0; i < NO_OF_THREADS; i++) { + scanThreads[i] = new ScanThread(table, reverse); + } + for (ScanThread thread : scanThreads) { + thread.start(); + } + return scanThreads; + } + + private GetThread[] initiateGet(HTable table, boolean tracker, boolean multipleCFs) + throws IOException, InterruptedException { + GetThread[] getThreads = new GetThread[NO_OF_THREADS]; + for (int i = 0; i < NO_OF_THREADS; i++) { + getThreads[i] = new GetThread(table, tracker, multipleCFs); + } + for (GetThread thread : getThreads) { + thread.start(); + } + return getThreads; + } + + private void checkForBlockEviction(BlockCache cache, boolean getClosed, boolean expectOnlyZero, + boolean wrappedCp) throws InterruptedException { + int counter = NO_OF_THREADS; + if (CustomInnerRegionObserver.waitForGets.get()) { + // Because only one row is selected, it has only 2 blocks + counter = counter - 1; + while (CustomInnerRegionObserver.countOfGets.get() < NO_OF_THREADS) { + Thread.sleep(100); + } + } else { + while (CustomInnerRegionObserver.countOfNext.get() < NO_OF_THREADS) { + Thread.sleep(100); + } + } + Iterator iterator = cache.iterator(); + int refCount = 0; + while (iterator.hasNext()) { + CachedBlock next = iterator.next(); + BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset()); + if (cache instanceof BucketCache) { + refCount = ((BucketCache) cache).getRefCount(cacheKey); + } else if (cache instanceof CombinedBlockCache) { + refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey); + } else { + continue; + } + System.out.println(" the refcount is " + refCount + " block is " + cacheKey); + if (CustomInnerRegionObserver.waitForGets.get()) { + if (expectOnlyZero) { + assertTrue(refCount == 0); + } + if (refCount != 0) { + // Because the scan would have also touched up on these blocks but + // it + // would have touched + // all 3 + if (getClosed) { + // If get has closed only the scan's blocks would be available + assertEquals(refCount, CustomInnerRegionObserver.countOfGets.get()); + } else { + assertEquals(refCount, CustomInnerRegionObserver.countOfGets.get() + (NO_OF_THREADS)); + } + } + } else { + // Because the get would have also touched up on these blocks but it + // would have touched + // upon only 2 additionally + if (expectOnlyZero) { + assertTrue(refCount == 0); + } + if (refCount != 0) { + if (getLatch == null || wrappedCp) { + assertEquals(refCount, CustomInnerRegionObserver.countOfNext.get()); + } else { + assertEquals(refCount, CustomInnerRegionObserver.countOfNext.get() + (NO_OF_THREADS)); + } + } + } + } + CustomInnerRegionObserver.getCdl().get().countDown(); + } + + private static class GetThread extends Thread { + private final HTable table; + private final boolean tracker; + private final boolean multipleCFs; + + public GetThread(HTable table, boolean tracker, boolean multipleCFs) { + this.table = table; + this.tracker = tracker; + this.multipleCFs = multipleCFs; + } + + @Override + public void run() { + try { + initiateGet(table); + } catch (IOException e) { + // do nothing + } + } + + private void initiateGet(HTable table) throws IOException { + Get get = new Get(ROW); + if (tracker) { + // Change this + if (!multipleCFs) { + get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 3)); + get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 8)); + get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 9)); + // Unknown key + get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 900)); + } else { + get.addColumn(Bytes.toBytes("testFamily" + 3), Bytes.toBytes("testQualifier" + 3)); + get.addColumn(Bytes.toBytes("testFamily" + 8), Bytes.toBytes("testQualifier" + 8)); + get.addColumn(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 9)); + // Unknown key + get.addColumn(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 900)); + } + } + CustomInnerRegionObserver.getCdl().set(latch); + Result r = table.get(get); + System.out.println(r); + if (!tracker) { + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data)); + assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2)); + } else { + if (!multipleCFs) { + assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 3)), data2)); + assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 8)), data2)); + assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 9)), data2)); + } else { + assertTrue(Bytes.equals( + r.getValue(Bytes.toBytes("testFamily" + 3), Bytes.toBytes("testQualifier" + 3)), + data2)); + assertTrue(Bytes.equals( + r.getValue(Bytes.toBytes("testFamily" + 8), Bytes.toBytes("testQualifier" + 8)), + data2)); + assertTrue(Bytes.equals( + r.getValue(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 9)), + data2)); + } + } + } + } + + private static class ScanThread extends Thread { + private final HTable table; + private final boolean reverse; + + public ScanThread(HTable table, boolean reverse) { + this.table = table; + this.reverse = reverse; + } + + @Override + public void run() { + try { + initiateScan(table); + } catch (IOException e) { + // do nothing + } + } + + private void initiateScan(HTable table) throws IOException { + Scan scan = new Scan(); + if (reverse) { + scan.setReversed(true); + } + CustomInnerRegionObserver.getCdl().set(latch); + ResultScanner resScanner = table.getScanner(scan); + int i = (reverse ? ROWS.length - 1 : 0); + boolean resultFound = false; + for (Result result : resScanner) { + resultFound = true; + System.out.println(result); + if (!reverse) { + assertTrue(Bytes.equals(result.getRow(), ROWS[i])); + i++; + } else { + assertTrue(Bytes.equals(result.getRow(), ROWS[i])); + i--; + } + } + assertTrue(resultFound); + } + } + + private void waitForStoreFileCount(Store store, int count, int timeout) + throws InterruptedException { + long start = System.currentTimeMillis(); + while (start + timeout > System.currentTimeMillis() && store.getStorefilesCount() != count) { + Thread.sleep(100); + } + System.out.println("start=" + start + ", now=" + System.currentTimeMillis() + ", cur=" + + store.getStorefilesCount()); + assertEquals(count, store.getStorefilesCount()); + } + + private static class CustomScanner implements RegionScanner { + + private RegionScanner delegate; + + public CustomScanner(RegionScanner delegate) { + this.delegate = delegate; + } + + @Override + public boolean next(List results) throws IOException { + return delegate.next(results); + } + + @Override + public boolean next(List result, ScannerContext scannerContext) throws IOException { + return delegate.next(result, scannerContext); + } + + @Override + public boolean nextRaw(List result) throws IOException { + return delegate.nextRaw(result); + } + + @Override + public boolean nextRaw(List result, ScannerContext context) throws IOException { + boolean nextRaw = delegate.nextRaw(result, context); + if (compactionLatch != null && compactionLatch.getCount() > 0) { + try { + compactionLatch.await(); + } catch (InterruptedException ie) { + } + } + if (CustomInnerRegionObserver.throwException.get()) { + if (exceptionLatch.getCount() > 0) { + try { + exceptionLatch.await(); + } catch (InterruptedException e) { + } + throw new IOException("throw exception"); + } + } + return nextRaw; + } + + @Override + public void close() throws IOException { + delegate.close(); + } + + @Override + public HRegionInfo getRegionInfo() { + return delegate.getRegionInfo(); + } + + @Override + public boolean isFilterDone() throws IOException { + return delegate.isFilterDone(); + } + + @Override + public boolean reseek(byte[] row) throws IOException { + return false; + } + + @Override + public long getMaxResultSize() { + return delegate.getMaxResultSize(); + } + + @Override + public long getMvccReadPoint() { + return delegate.getMvccReadPoint(); + } + + @Override + public int getBatch() { + return delegate.getBatch(); + } + } + + public static class CustomInnerRegionObserverWrapper extends CustomInnerRegionObserver { + @Override + public RegionScanner postScannerOpen(ObserverContext e, + Scan scan, RegionScanner s) throws IOException { + return new CustomScanner(s); + } + } + + public static class CustomInnerRegionObserver extends BaseRegionObserver { + static final AtomicLong sleepTime = new AtomicLong(0); + static final AtomicBoolean slowDownNext = new AtomicBoolean(false); + static final AtomicInteger countOfNext = new AtomicInteger(0); + static final AtomicInteger countOfGets = new AtomicInteger(0); + static final AtomicBoolean waitForGets = new AtomicBoolean(false); + static final AtomicBoolean throwException = new AtomicBoolean(false); + private static final AtomicReference cdl = new AtomicReference( + new CountDownLatch(0)); + + @Override + public boolean postScannerNext(ObserverContext e, + InternalScanner s, List results, int limit, boolean hasMore) throws IOException { + slowdownCode(e, false); + if (getLatch != null && getLatch.getCount() > 0) { + try { + getLatch.await(); + } catch (InterruptedException e1) { + } + } + return super.postScannerNext(e, s, results, limit, hasMore); + } + + @Override + public void postGetOp(ObserverContext e, Get get, + List results) throws IOException { + slowdownCode(e, true); + super.postGetOp(e, get, results); + } + + public static AtomicReference getCdl() { + return cdl; + } + + private void slowdownCode(final ObserverContext e, boolean isGet) { + CountDownLatch latch = getCdl().get(); + try { + System.out.println(latch.getCount() + " is the count " + isGet); + if (latch.getCount() > 0) { + if (isGet) { + countOfGets.incrementAndGet(); + } else { + countOfNext.incrementAndGet(); + } + LOG.info("Waiting for the counterCountDownLatch"); + latch.await(2, TimeUnit.MINUTES); // To help the tests to finish. + if (latch.getCount() > 0) { + throw new RuntimeException("Can't wait more"); + } + } + } catch (InterruptedException e1) { + LOG.error(e1); + } + } + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java index 55f8dda..d6e8784 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java @@ -43,7 +43,7 @@ public class TestByteBufferOutputStream { bbos.write(bytes); assertTrue(Bytes.compareTo(bytes, bbos.toByteArray(0, bytes.length)) == 0); bbos.flush(); - return bbos.getByteBuffer(); + return bbos.flipAndReturnBuffer(); } } } \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java index c410c13..7bc7b84 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java @@ -28,8 +28,13 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.EnumMap; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.Map.Entry; import java.util.Random; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -290,6 +295,8 @@ public class TestCacheOnWrite { DataBlockEncoding encodingInCache = encoderType.getEncoder().getDataBlockEncoding(); + List cachedBlocksOffset = new ArrayList(); + Map cachedBlocks = new HashMap(); while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) { long onDiskSize = -1; if (prevBlock != null) { @@ -303,6 +310,8 @@ public class TestCacheOnWrite { offset); HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true); boolean isCached = fromCache != null; + cachedBlocksOffset.add(offset); + cachedBlocks.put(offset, fromCache); boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType()); assertTrue("shouldBeCached: " + shouldBeCached+ "\n" + "isCached: " + isCached + "\n" + @@ -355,6 +364,27 @@ public class TestCacheOnWrite { while (scanner.next()) { scanner.getKeyValue(); } + Iterator iterator = cachedBlocksOffset.iterator(); + while(iterator.hasNext()) { + Long entry = iterator.next(); + BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(), + entry); + HFileBlock hFileBlock = cachedBlocks.get(entry); + if (hFileBlock != null) { + // call return twice because for the isCache cased the counter would have got incremented twice + blockCache.returnBlock(blockCacheKey, hFileBlock); + if(cacheCompressedData) { + if (this.compress == Compression.Algorithm.NONE + || cowType == CacheOnWriteType.INDEX_BLOCKS + || cowType == CacheOnWriteType.BLOOM_BLOCKS) { + blockCache.returnBlock(blockCacheKey, hFileBlock); + } + } else { + blockCache.returnBlock(blockCacheKey, hFileBlock); + } + } + } + scanner.returnCurrentBatch(); reader.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java index 8f42e6b..9cb6150 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java @@ -329,7 +329,7 @@ public class TestHFile extends HBaseTestCase { private void readNumMetablocks(Reader reader, int n) throws IOException { for (int i = 0; i < n; i++) { - ByteBuffer actual = reader.getMetaBlock("HFileMeta" + i, false); + ByteBuffer actual = reader.getMetaBlock("HFileMeta" + i, false).getBufferWithoutHeader(); ByteBuffer expected = ByteBuffer.wrap(("something to test" + i).getBytes()); assertEquals("failed to match metadata", diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index 8891a6a..4e2a7a8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -167,6 +167,10 @@ public class TestHFileBlockIndex { } @Override + public void returnBlock(HFileBlock block) { + } + + @Override public HFileBlock readBlock(long offset, long onDiskSize, boolean cacheBlock, boolean pread, boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java index 511f942..9aa23c7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java @@ -22,8 +22,10 @@ import static org.junit.Assert.assertTrue; import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -63,8 +65,8 @@ public class TestByteBufferIOEngine { offset = (int) (Math.random() * (capacity - maxBlockSize)); } ioEngine.write(srcBuffer, offset); - ByteBuffer dstBuffer = ByteBuffer.allocate(blockSize); - ioEngine.read(dstBuffer, offset); + Pair pair = ioEngine.read(offset, blockSize); + ByteBuffer dstBuffer = pair.getFirst(); byte[] byteArray2 = dstBuffer.array(); for (int j = 0; j < byteArray.length; ++j) { assertTrue(byteArray[j] == byteArray2[j]); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java index 8306114..e8ea8cc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java @@ -24,8 +24,10 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; +import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType; import org.apache.hadoop.hbase.testclassification.IOTests; import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.Pair; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -47,9 +49,9 @@ public class TestFileIOEngine { for (int j = 0; j < data1.length; ++j) { data1[j] = (byte) (Math.random() * 255); } - byte[] data2 = new byte[len]; fileIOEngine.write(ByteBuffer.wrap(data1), offset); - fileIOEngine.read(ByteBuffer.wrap(data2), offset); + Pair pair = fileIOEngine.read(offset, len); + byte[] data2 = pair.getFirst().array(); for (int j = 0; j < data1.length; ++j) { assertTrue(data1[j] == data2[j]); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java index 32eb9f6..104c09c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java @@ -27,6 +27,7 @@ import static org.mockito.internal.verification.VerificationModeFactory.times; import java.io.IOException; import java.net.InetSocketAddress; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -47,6 +48,7 @@ import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.apache.hadoop.io.compress.GzipCodec; import org.apache.hadoop.util.StringUtils; import org.junit.Test; @@ -137,7 +139,7 @@ public abstract class AbstractTestIPC { } @Override - public Pair call(BlockingService service, MethodDescriptor md, + public Triple call(BlockingService service, MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status) throws IOException { return super.call(service, md, param, cellScanner, receiveTime, status); @@ -204,8 +206,8 @@ public abstract class AbstractTestIPC { PayloadCarryingRpcController pcrc = new PayloadCarryingRpcController(CellUtil.createCellScanner(cells)); - Pair r = - client.call(pcrc, md, param, md.getOutputType().toProto(), User.getCurrent(), address); + Pair r = client.call(pcrc, md, param, md.getOutputType().toProto(), + User.getCurrent(), address); int index = 0; while (r.getSecond().advance()) { assertTrue(CELL.equals(r.getSecond().current())); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java index 8a6b092..3e8f0ea 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java @@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.testclassification.RPCTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Pair; +import org.apache.hadoop.hbase.util.Triple; import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -51,6 +52,7 @@ import org.mockito.Mockito; import java.io.IOException; import java.net.InetSocketAddress; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; @@ -136,7 +138,7 @@ public class TestRpcHandlerException { } @Override - public Pair call(BlockingService service, MethodDescriptor md, + public Triple call(BlockingService service, MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status) throws IOException { return super.call(service, md, param, cellScanner, receiveTime, status); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java index 2965071..03e6df7 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.io.hfile.BlockCacheKey; import org.apache.hadoop.hbase.io.hfile.CacheStats; import org.apache.hadoop.hbase.io.hfile.Cacheable; import org.apache.hadoop.hbase.io.hfile.CachedBlock; +import org.apache.hadoop.hbase.io.hfile.HFileBlock; import org.apache.hadoop.hbase.io.hfile.ResizableBlockCache; import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil; import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext; @@ -400,6 +401,11 @@ public class TestHeapMemoryManager { public BlockCache[] getBlockCaches() { return null; } + + @Override + public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) { + return true; + } } private static class MemstoreFlusherStub implements FlushRequester { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java index 0fa904c..3b47432 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueHeap.java @@ -190,12 +190,14 @@ public class TestKeyValueHeap extends HBaseTestCase { l1.add(new KeyValue(row1, fam1, col5, data)); l1.add(new KeyValue(row2, fam1, col1, data)); l1.add(new KeyValue(row2, fam1, col2, data)); - scanners.add(new Scanner(l1)); + Scanner s1 = new Scanner(l1); + scanners.add(s1); List l2 = new ArrayList(); l2.add(new KeyValue(row1, fam1, col1, data)); l2.add(new KeyValue(row1, fam1, col2, data)); - scanners.add(new Scanner(l2)); + Scanner s2 = new Scanner(l2); + scanners.add(s2); List l3 = new ArrayList(); l3.add(new KeyValue(row1, fam1, col3, data)); @@ -203,15 +205,26 @@ public class TestKeyValueHeap extends HBaseTestCase { l3.add(new KeyValue(row1, fam2, col1, data)); l3.add(new KeyValue(row1, fam2, col2, data)); l3.add(new KeyValue(row2, fam1, col3, data)); - scanners.add(new Scanner(l3)); + Scanner s3 = new Scanner(l3); + scanners.add(s3); + List l4 = new ArrayList(); - scanners.add(new Scanner(l4)); + Scanner s4 = new Scanner(l4); + scanners.add(s4); //Creating KeyValueHeap KeyValueHeap kvh = new KeyValueHeap(scanners, CellComparator.COMPARATOR); while(kvh.next() != null); + // Once the internal scanners go out of Cells, those will be removed from KVHeap's priority + // queue and added to a Set for lazy close. The actual close will happen only on KVHeap#close() + assertEquals(4, kvh.scannersForDelayedClose.size()); + assertTrue(kvh.scannersForDelayedClose.contains(s1)); + assertTrue(kvh.scannersForDelayedClose.contains(s2)); + assertTrue(kvh.scannersForDelayedClose.contains(s3)); + assertTrue(kvh.scannersForDelayedClose.contains(s4)); + kvh.close(); for(KeyValueScanner scanner : scanners) { assertTrue(((Scanner)scanner).isClosed()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java index a3c6685..8442def 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java @@ -425,14 +425,14 @@ public class TestScannerHeartbeatMessages { // Instantiate the custom heartbeat region scanners @Override protected RegionScanner instantiateRegionScanner(Scan scan, - List additionalScanners) throws IOException { + List additionalScanners, boolean cellBlock) throws IOException { if (scan.isReversed()) { if (scan.getFilter() != null) { scan.getFilter().setReversed(true); } - return new HeartbeatReversedRegionScanner(scan, additionalScanners, this); + return new HeartbeatReversedRegionScanner(scan, additionalScanners, this, cellBlock); } - return new HeartbeatRegionScanner(scan, additionalScanners, this); + return new HeartbeatRegionScanner(scan, additionalScanners, this, cellBlock); } } @@ -442,8 +442,8 @@ public class TestScannerHeartbeatMessages { */ private static class HeartbeatReversedRegionScanner extends ReversedRegionScannerImpl { HeartbeatReversedRegionScanner(Scan scan, List additionalScanners, - HRegion region) throws IOException { - super(scan, additionalScanners, region); + HRegion region, boolean cellBlock) throws IOException { + super(scan, additionalScanners, region, cellBlock); } @Override @@ -469,9 +469,9 @@ public class TestScannerHeartbeatMessages { * column family cells */ private static class HeartbeatRegionScanner extends RegionScannerImpl { - HeartbeatRegionScanner(Scan scan, List additionalScanners, HRegion region) - throws IOException { - region.super(scan, additionalScanners, region); + HeartbeatRegionScanner(Scan scan, List additionalScanners, HRegion region, + boolean cellBlock) throws IOException { + region.super(scan, additionalScanners, region, cellBlock); } @Override