.../org/apache/hadoop/hbase/client/HTable.java | 6 +-
.../client/RpcRetryingCallerWithReadReplicas.java | 2 +-
.../apache/hadoop/hbase/ipc/AbstractRpcClient.java | 4 +-
.../apache/hadoop/hbase/ipc/AsyncRpcChannel.java | 1 +
.../apache/hadoop/hbase/ipc/AsyncRpcClient.java | 4 +-
.../java/org/apache/hadoop/hbase/ipc/IPCUtil.java | 101 +-
.../hbase/ipc/PayloadCarryingRpcController.java | 72 ++
.../org/apache/hadoop/hbase/ipc/RpcClientImpl.java | 1 +
.../org/apache/hadoop/hbase/ipc/TestIPCUtil.java | 41 +-
.../hadoop/hbase/SharedMemoryBackedCell.java | 36 +
.../hadoop/hbase/io/ByteBufferOutputStream.java | 17 +-
.../apache/hadoop/hbase/io/hfile/HFileContext.java | 22 +-
.../hadoop/hbase/io/hfile/HFileContextBuilder.java | 9 +-
.../hadoop/hbase/io/HalfStoreFileReader.java | 11 +
.../apache/hadoop/hbase/io/hfile/BlockCache.java | 19 +
.../hadoop/hbase/io/hfile/CombinedBlockCache.java | 26 +
.../org/apache/hadoop/hbase/io/hfile/HFile.java | 10 +-
.../apache/hadoop/hbase/io/hfile/HFileBlock.java | 26 +-
.../hadoop/hbase/io/hfile/HFileBlockIndex.java | 126 +-
.../hadoop/hbase/io/hfile/HFileReaderImpl.java | 339 +++--
.../apache/hadoop/hbase/io/hfile/HFileScanner.java | 8 +-
.../hadoop/hbase/io/hfile/LruBlockCache.java | 12 +-
.../hadoop/hbase/io/hfile/MemcachedBlockCache.java | 6 +
.../hadoop/hbase/io/hfile/bucket/BucketCache.java | 84 +-
.../hbase/io/hfile/bucket/ByteBufferIOEngine.java | 9 +-
.../hadoop/hbase/io/hfile/bucket/FileIOEngine.java | 8 +-
.../hadoop/hbase/io/hfile/bucket/IOEngine.java | 10 +-
.../org/apache/hadoop/hbase/ipc/CallRunner.java | 22 +-
.../org/apache/hadoop/hbase/ipc/RpcServer.java | 61 +-
.../hadoop/hbase/ipc/RpcServerInterface.java | 10 +-
.../hadoop/hbase/quotas/DefaultOperationQuota.java | 5 +
.../hadoop/hbase/quotas/NoopOperationQuota.java | 5 +
.../apache/hadoop/hbase/quotas/OperationQuota.java | 6 +
.../hbase/regionserver/BatchReturnableScanner.java | 33 +
.../hadoop/hbase/regionserver/GetContext.java | 49 +
.../apache/hadoop/hbase/regionserver/HRegion.java | 188 ++-
.../hadoop/hbase/regionserver/KeyValueHeap.java | 53 +-
.../hadoop/hbase/regionserver/KeyValueScanner.java | 2 +-
.../hbase/regionserver/NonLazyKeyValueScanner.java | 4 +
.../hadoop/hbase/regionserver/RSRpcServices.java | 99 +-
.../hbase/regionserver/ReversedKeyValueHeap.java | 6 +-
.../regionserver/ReversedRegionScannerImpl.java | 5 +-
.../hadoop/hbase/regionserver/StoreFile.java | 12 +-
.../hbase/regionserver/StoreFileScanner.java | 19 +-
.../hadoop/hbase/regionserver/StoreScanner.java | 88 +-
.../hadoop/hbase/util/CompoundBloomFilter.java | 2 +
.../apache/hadoop/hbase/HBaseTestingUtility.java | 18 +
.../apache/hadoop/hbase/TestAcidGuarantees.java | 4 +
.../hbase/client/TestBlockEvictionFromClient.java | 1320 ++++++++++++++++++++
.../hbase/io/TestByteBufferOutputStream.java | 2 +-
.../hadoop/hbase/io/hfile/TestCacheOnWrite.java | 30 +
.../apache/hadoop/hbase/io/hfile/TestHFile.java | 2 +-
.../hadoop/hbase/io/hfile/TestHFileBlockIndex.java | 4 +
.../io/hfile/bucket/TestByteBufferIOEngine.java | 6 +-
.../hbase/io/hfile/bucket/TestFileIOEngine.java | 6 +-
.../apache/hadoop/hbase/ipc/AbstractTestIPC.java | 8 +-
.../hadoop/hbase/ipc/TestRpcHandlerException.java | 4 +-
.../hbase/regionserver/TestHeapMemoryManager.java | 6 +
.../regionserver/TestScannerHeartbeatMessages.java | 16 +-
59 files changed, 2694 insertions(+), 411 deletions(-)
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java
index 686aaa8..9eb811a 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java
@@ -38,6 +38,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
@@ -61,6 +62,7 @@ import org.apache.hadoop.hbase.ipc.RegionCoprocessorRpcChannel;
import org.apache.hadoop.hbase.ipc.RpcControllerFactory;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.RequestConverter;
+import org.apache.hadoop.hbase.protobuf.ResponseConverter;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MultiRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutateRequest;
@@ -596,7 +598,7 @@ public class HTable implements HTableInterface {
try {
ClientProtos.GetResponse response = getStub().get(controller, request);
if (!response.hasResult()) return null;
- return ProtobufUtil.toResult(response.getResult());
+ return ProtobufUtil.toResult(response.getResult(), controller.cellScanner());
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
@@ -712,7 +714,7 @@ public class HTable implements HTableInterface {
try {
ClientProtos.GetResponse response = getStub().get(controller, request);
if (response == null) return null;
- return ProtobufUtil.toResult(response.getResult());
+ return ProtobufUtil.toResult(response.getResult(), controller.cellScanner());
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java
index 8f28881..f587a96 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java
@@ -161,7 +161,7 @@ public class RpcRetryingCallerWithReadReplicas {
if (response == null) {
return null;
}
- return ProtobufUtil.toResult(response.getResult());
+ return ProtobufUtil.toResult(response.getResult(), controller.cellScanner());
} catch (ServiceException se) {
throw ProtobufUtil.getRemoteException(se);
}
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java
index 9be370d..e59478f 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AbstractRpcClient.java
@@ -57,7 +57,6 @@ public abstract class AbstractRpcClient implements RpcClient {
protected final SocketAddress localAddr;
protected UserProvider userProvider;
- protected final IPCUtil ipcUtil;
protected final int minIdleTimeBeforeClose; // if the connection is idle for more than this
// time (in ms), it will be closed at any moment.
@@ -72,6 +71,7 @@ public abstract class AbstractRpcClient implements RpcClient {
protected final int connectTO;
protected final int readTO;
protected final int writeTO;
+ protected final IPCUtil ipcUtil;
/**
* Construct an IPC client for the cluster clusterId
@@ -89,10 +89,10 @@ public abstract class AbstractRpcClient implements RpcClient {
HConstants.DEFAULT_HBASE_CLIENT_PAUSE);
this.maxRetries = conf.getInt("hbase.ipc.client.connect.max.retries", 0);
this.tcpNoDelay = conf.getBoolean("hbase.ipc.client.tcpnodelay", true);
- this.ipcUtil = new IPCUtil(conf);
this.minIdleTimeBeforeClose = conf.getInt(IDLE_TIME, 120000); // 2 minutes
this.conf = conf;
+ this.ipcUtil = IPCUtil.createInstance(conf);
this.codec = getCodec();
this.compressor = getCompressor(conf);
this.fallbackAllowed = conf.getBoolean(IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java
index cfc8b1b..be8a375 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcChannel.java
@@ -48,6 +48,7 @@ import javax.security.sasl.SaslException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ipc.IPCUtil;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.ConnectionClosingException;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java
index 2e4d0a6..91e51fa 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/AsyncRpcClient.java
@@ -316,7 +316,7 @@ public class AsyncRpcClient extends AbstractRpcClient {
* @throws java.io.IOException on error on creation cell scanner
*/
public CellScanner createCellScanner(byte[] cellBlock) throws IOException {
- return ipcUtil.createCellScanner(this.codec, this.compressor, cellBlock);
+ return this.ipcUtil.createCellScanner(this.codec, this.compressor, cellBlock);
}
/**
@@ -327,7 +327,7 @@ public class AsyncRpcClient extends AbstractRpcClient {
* @throws java.io.IOException if block creation fails
*/
public ByteBuffer buildCellBlock(CellScanner cells) throws IOException {
- return ipcUtil.buildCellBlock(this.codec, this.compressor, cells);
+ return this.ipcUtil.buildCellBlock(this.codec, this.compressor, cells);
}
/**
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java
index 056ecbc..93effc7 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/IPCUtil.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
@@ -30,12 +31,12 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.io.BoundedByteBufferPool;
import org.apache.hadoop.hbase.io.ByteBufferOutputStream;
-import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.io.compress.CodecPool;
@@ -61,17 +62,21 @@ public class IPCUtil {
private final int cellBlockDecompressionMultiplier;
private final int cellBlockBuildingInitialBufferSize;
private final Configuration conf;
+ private static IPCUtil INSTANCE = null;
- public IPCUtil(final Configuration conf) {
- super();
+ private IPCUtil(Configuration conf) {
this.conf = conf;
- this.cellBlockDecompressionMultiplier =
- conf.getInt("hbase.ipc.cellblock.decompression.buffersize.multiplier", 3);
+ this.cellBlockDecompressionMultiplier = conf.getInt(
+ "hbase.ipc.cellblock.decompression.buffersize.multiplier", 3);
+ // Guess that 16k is a good size for rpc buffer. Could go bigger. See the
+ // TODO below in #buildCellBlock.
+ this.cellBlockBuildingInitialBufferSize = ClassSize.align(conf.getInt(
+ "hbase.ipc.cellblock.building.initial.buffersize", 16 * 1024));
+ };
- // Guess that 16k is a good size for rpc buffer. Could go bigger. See the TODO below in
- // #buildCellBlock.
- this.cellBlockBuildingInitialBufferSize =
- ClassSize.align(conf.getInt("hbase.ipc.cellblock.building.initial.buffersize", 16 * 1024));
+ public static IPCUtil createInstance(Configuration conf) {
+ INSTANCE = new IPCUtil(conf);
+ return INSTANCE;
}
/**
@@ -97,6 +102,10 @@ public class IPCUtil {
return buildCellBlock(codec, compressor, cellScanner, null);
}
+ public static IPCUtil getInstance() {
+ return INSTANCE;
+ }
+
/**
* Puts CellScanner Cells into a cell block using passed in codec and/or
* compressor.
@@ -118,24 +127,10 @@ public class IPCUtil {
throws IOException {
if (cellScanner == null) return null;
if (codec == null) throw new CellScannerButNoCodecException();
- int bufferSize = this.cellBlockBuildingInitialBufferSize;
- ByteBufferOutputStream baos = null;
- if (pool != null) {
- ByteBuffer bb = pool.getBuffer();
- bufferSize = bb.capacity();
- baos = new ByteBufferOutputStream(bb);
- } else {
- // Then we need to make our own to return.
- if (cellScanner instanceof HeapSize) {
- long longSize = ((HeapSize)cellScanner).heapSize();
- // Just make sure we don't have a size bigger than an int.
- if (longSize > Integer.MAX_VALUE) {
- throw new IOException("Size " + longSize + " > " + Integer.MAX_VALUE);
- }
- bufferSize = ClassSize.align((int)longSize);
- }
- baos = new ByteBufferOutputStream(bufferSize);
- }
+ ByteBufferOutputStream baos = createCellBlockStream(-1, pool); // We don't know the actual
+ // cell block size before
+ // hand. So just pass -1.
+ int bufferSize = baos.getByteBuffer().capacity();
OutputStream os = baos;
Compressor poolCompressor = null;
try {
@@ -164,7 +159,55 @@ public class IPCUtil {
"; up hbase.ipc.cellblock.building.initial.buffersize?");
}
}
- return baos.getByteBuffer();
+ return baos.flipAndReturnBuffer();
+ }
+
+ @SuppressWarnings("resource")
+ public ByteBuffer buildCellBlock(Codec codec, CompressionCodec compressor,
+ ArrayList results, int bufferSize, BoundedByteBufferPool pool)
+ throws IOException {
+ if (results == null || results.isEmpty()) return null;
+ ByteBufferOutputStream baos = createCellBlockStream(bufferSize, pool);
+ Compressor poolCompressor = null;
+ OutputStream os = baos;
+ try {
+ if (compressor != null) {
+ if (compressor instanceof Configurable)
+ ((Configurable) compressor).setConf(conf);
+ poolCompressor = CodecPool.getCompressor(compressor);
+ os = compressor.createOutputStream(os, poolCompressor);
+ }
+ Codec.Encoder encoder = codec.getEncoder(os);
+ for (int i = 0; i < results.size(); i++) {
+ encoder.write(results.get(i));
+ }
+ encoder.flush();
+ } finally {
+ os.close();
+ if (poolCompressor != null)
+ CodecPool.returnCompressor(poolCompressor);
+ }
+ return baos.flipAndReturnBuffer();
+ }
+
+ private ByteBufferOutputStream createCellBlockStream(int bufferSize, BoundedByteBufferPool pool)
+ throws IOException {
+ if (bufferSize < 0) {
+ bufferSize = cellBlockBuildingInitialBufferSize;
+ }
+ ByteBufferOutputStream baos = null;
+ if (pool != null) {
+ // TODO make use of totalCellSize. We know the size needed already. We have
+ // to add API in BoundedByteBufferPool also.
+ ByteBuffer bb = pool.getBuffer();
+ // Dead code
+ bufferSize = bb.capacity();
+ baos = new ByteBufferOutputStream(bb);
+ } else {
+ // Then we need to make our own to return.
+ baos = new ByteBufferOutputStream(bufferSize);
+ }
+ return baos;
}
/**
@@ -208,7 +251,7 @@ public class IPCUtil {
this.cellBlockDecompressionMultiplier);
IOUtils.copy(cis, bbos);
bbos.close();
- ByteBuffer bb = bbos.getByteBuffer();
+ ByteBuffer bb = bbos.flipAndReturnBuffer();
is = new ByteArrayInputStream(bb.array(), 0, bb.limit());
} finally {
if (is != null) is.close();
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java
index 09f4323..b288bb9 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/PayloadCarryingRpcController.java
@@ -17,14 +17,21 @@
*/
package org.apache.hadoop.hbase.ipc;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.List;
+import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScannable;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.io.BoundedByteBufferPool;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.codec.Codec;
+import org.apache.hadoop.io.compress.CompressionCodec;
/**
* Optionally carries Cells across the proxy/service interface down into ipc. On its
@@ -52,6 +59,30 @@ public class PayloadCarryingRpcController
*/
private CellScanner cellScanner;
+ /**
+ * The payload can be set either as a CellScanner or as a Buffer of Cell data ready to be
+ * serialized over wire. When payload is set as block buffer, we must set meta data about this
+ * block ie. number of cells in this block.
+ */
+ private ByteBuffer cellBlock;
+ private int cellsCountInBlock;
+
+ private BoundedByteBufferPool reservoir = null;
+ private Codec codec = null;
+ private CompressionCodec compressionCodec = null;
+
+ public void setReservoir(final BoundedByteBufferPool pool) {
+ this.reservoir = pool;
+ }
+
+ public void setCodec(Codec codec) {
+ this.codec = codec;
+ }
+
+ public void setCompressionCodec(CompressionCodec codec) {
+ this.compressionCodec = codec;
+ }
+
public PayloadCarryingRpcController() {
this((CellScanner)null);
}
@@ -65,6 +96,31 @@ public class PayloadCarryingRpcController
}
/**
+ * Builds a cellblock from the passed cells
+ * @param cells
+ * @param totalCellsSize
+ * @throws IOException
+ */
+ public void setCellBlock(final ArrayList cells, int totalCellsSize) throws IOException {
+ this.cellBlock = IPCUtil.getInstance().buildCellBlock(this.codec, this.compressionCodec,
+ cells, totalCellsSize, this.reservoir);
+ }
+
+ /**
+ * Builds a cellblock from the cellScanner
+ * @param cellScanner
+ * @throws IOException
+ */
+ public void setCellBlock(final CellScanner scanner) throws IOException {
+ cellBlock = IPCUtil.getInstance().buildCellBlock(this.codec, this.compressionCodec, scanner,
+ reservoir);
+ }
+
+ public ByteBuffer getCellBlock() {
+ return this.cellBlock;
+ }
+
+ /**
* @return One-shot cell scanner (you cannot back it up and restart)
*/
public CellScanner cellScanner() {
@@ -75,6 +131,12 @@ public class PayloadCarryingRpcController
this.cellScanner = cellScanner;
}
+ public void clearPayload() {
+ this.cellScanner = null;
+ this.cellBlock = null;
+ this.cellsCountInBlock = 0;
+ }
+
/**
* @param priority Priority for this request; should fall roughly in the range
* {@link HConstants#NORMAL_QOS} to {@link HConstants#HIGH_QOS}
@@ -102,5 +164,15 @@ public class PayloadCarryingRpcController
super.reset();
priority = 0;
cellScanner = null;
+ this.cellBlock = null;
+ this.cellsCountInBlock = 0;
+ }
+
+ public void setCellsCountInBlock(int numberOfCells) {
+ this.cellsCountInBlock = numberOfCells;
+ }
+
+ public int getCellsCountInBlock() {
+ return this.cellsCountInBlock;
}
}
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java
index 9a5fc14..9921f12 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ipc/RpcClientImpl.java
@@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.ipc.IPCUtil;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.codec.Codec;
diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java
index 163be70..8ca9759 100644
--- a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java
+++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestIPCUtil.java
@@ -31,7 +31,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
-import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.ipc.IPCUtil;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.codec.KeyValueCodec;
@@ -53,33 +53,31 @@ public class TestIPCUtil {
private static final Log LOG = LogFactory.getLog(TestIPCUtil.class);
- IPCUtil util;
@Before
public void before() {
- this.util = new IPCUtil(new Configuration());
+ IPCUtil.createInstance(new Configuration());
}
-
+
@Test
public void testBuildCellBlock() throws IOException {
- doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), null);
- doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), new DefaultCodec());
- doBuildCellBlockUndoCellBlock(this.util, new KeyValueCodec(), new GzipCodec());
+ doBuildCellBlockUndoCellBlock(new KeyValueCodec(), null);
+ doBuildCellBlockUndoCellBlock(new KeyValueCodec(), new DefaultCodec());
+ doBuildCellBlockUndoCellBlock(new KeyValueCodec(), new GzipCodec());
}
- static void doBuildCellBlockUndoCellBlock(final IPCUtil util,
- final Codec codec, final CompressionCodec compressor)
+ static void doBuildCellBlockUndoCellBlock(final Codec codec, final CompressionCodec compressor)
throws IOException {
- doBuildCellBlockUndoCellBlock(util, codec, compressor, 10, 1, false);
+ doBuildCellBlockUndoCellBlock(codec, compressor, 10, 1, false);
}
- static void doBuildCellBlockUndoCellBlock(final IPCUtil util, final Codec codec,
+ static void doBuildCellBlockUndoCellBlock(final Codec codec,
final CompressionCodec compressor, final int count, final int size, final boolean sized)
throws IOException {
Cell [] cells = getCells(count, size);
CellScanner cellScanner = sized? getSizedCellScanner(cells):
CellUtil.createCellScanner(Arrays.asList(cells).iterator());
- ByteBuffer bb = util.buildCellBlock(codec, compressor, cellScanner);
- cellScanner = util.createCellScanner(codec, compressor, bb.array(), 0, bb.limit());
+ ByteBuffer bb = IPCUtil.getInstance().buildCellBlock(codec, compressor, cellScanner);
+ cellScanner = IPCUtil.getInstance().createCellScanner(codec, compressor, bb.array(), 0, bb.limit());
int i = 0;
while (cellScanner.advance()) {
i++;
@@ -143,14 +141,14 @@ public class TestIPCUtil {
System.exit(errCode);
}
- private static void timerTests(final IPCUtil util, final int count, final int size,
+ private static void timerTests(final int count, final int size,
final Codec codec, final CompressionCodec compressor)
throws IOException {
final int cycles = 1000;
StopWatch timer = new StopWatch();
timer.start();
for (int i = 0; i < cycles; i++) {
- timerTest(util, timer, count, size, codec, compressor, false);
+ timerTest(timer, count, size, codec, compressor, false);
}
timer.stop();
LOG.info("Codec=" + codec + ", compression=" + compressor + ", sized=" + false +
@@ -158,17 +156,17 @@ public class TestIPCUtil {
timer.reset();
timer.start();
for (int i = 0; i < cycles; i++) {
- timerTest(util, timer, count, size, codec, compressor, true);
+ timerTest(timer, count, size, codec, compressor, true);
}
timer.stop();
LOG.info("Codec=" + codec + ", compression=" + compressor + ", sized=" + true +
", count=" + count + ", size=" + size + ", + took=" + timer.getTime() + "ms");
}
- private static void timerTest(final IPCUtil util, final StopWatch timer, final int count,
+ private static void timerTest(final StopWatch timer, final int count,
final int size, final Codec codec, final CompressionCodec compressor, final boolean sized)
throws IOException {
- doBuildCellBlockUndoCellBlock(util, codec, compressor, count, size, sized);
+ doBuildCellBlockUndoCellBlock(codec, compressor, count, size, sized);
}
/**
@@ -188,10 +186,9 @@ public class TestIPCUtil {
usage(1);
}
}
- IPCUtil util = new IPCUtil(HBaseConfiguration.create());
((Log4JLogger)IPCUtil.LOG).getLogger().setLevel(Level.ALL);
- timerTests(util, count, size, new KeyValueCodec(), null);
- timerTests(util, count, size, new KeyValueCodec(), new DefaultCodec());
- timerTests(util, count, size, new KeyValueCodec(), new GzipCodec());
+ timerTests(count, size, new KeyValueCodec(), null);
+ timerTests(count, size, new KeyValueCodec(), new DefaultCodec());
+ timerTests(count, size, new KeyValueCodec(), new GzipCodec());
}
}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java
new file mode 100644
index 0000000..b191bd5
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/SharedMemoryBackedCell.java
@@ -0,0 +1,36 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+
+/**
+ * Any Cell implementing this interface means its data is in shared common cache memory. When we
+ * read data from such cache, we will make a cell referring to the same memory and will avoid any copy.
+ */
+@InterfaceAudience.Private
+public interface SharedMemoryBackedCell extends Cell {
+
+ /**
+ * Copy the cell's data from shared cache memory to a dedicated memory area and return as a new
+ * Cell. This is useful when we want to consume the Cell before even the block referring to these cells
+ * are evicted.
+ * @return A copy of the Cell
+ */
+ Cell copy();
+}
\ No newline at end of file
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java
index af12113..14f3135 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/ByteBufferOutputStream.java
@@ -25,6 +25,8 @@ import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
@@ -36,6 +38,7 @@ import org.apache.hadoop.hbase.util.Bytes;
@InterfaceStability.Evolving
public class ByteBufferOutputStream extends OutputStream {
+ private static final Log LOG = LogFactory.getLog(ByteBufferOutputStream.class);
protected ByteBuffer buf;
public ByteBufferOutputStream(int capacity) {
@@ -50,11 +53,11 @@ public class ByteBufferOutputStream extends OutputStream {
* @param bb ByteBuffer to use. If too small, will be discarded and a new one allocated in its
* place; i.e. the passed in BB may NOT BE RETURNED!! Minimally it will be altered. SIDE EFFECT!!
* If you want to get the newly allocated ByteBuffer, you'll need to pick it up when
- * done with this instance by calling {@link #getByteBuffer()}. All this encapsulation violation
+ * done with this instance by calling {@link #flipAndReturnBuffer()}. All this encapsulation violation
* is so we can recycle buffers rather than allocate each time; it can get expensive especially
* if the buffers are big doing allocations each time or having them undergo resizing because
* initial allocation was small.
- * @see #getByteBuffer()
+ * @see #flipAndReturnBuffer()
*/
public ByteBufferOutputStream(final ByteBuffer bb) {
this.buf = bb;
@@ -73,11 +76,19 @@ public class ByteBufferOutputStream extends OutputStream {
* This flips the underlying BB so be sure to use it _last_!
* @return ByteBuffer
*/
- public ByteBuffer getByteBuffer() {
+ public ByteBuffer flipAndReturnBuffer() {
buf.flip();
return buf;
}
+ /**
+ * This flips the underlying BB so be sure to use it _last_!
+ * @return ByteBuffer
+ */
+ public ByteBuffer getByteBuffer() {
+ return buf;
+ }
+
private void checkSizeAndGrow(int extra) {
if ( (buf.position() + extra) > buf.limit()) {
// size calculation is complex, because we could overflow negative,
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
index 5edd47d..a1f105a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContext.java
@@ -56,6 +56,7 @@ public class HFileContext implements HeapSize, Cloneable {
/** Encryption algorithm and key used */
private Encryption.Context cryptoContext = Encryption.Context.NONE;
private long fileCreateTime;
+ private String hfileName;
//Empty constructor. Go with setters
public HFileContext() {
@@ -77,12 +78,13 @@ public class HFileContext implements HeapSize, Cloneable {
this.encoding = context.encoding;
this.cryptoContext = context.cryptoContext;
this.fileCreateTime = context.fileCreateTime;
+ this.hfileName = context.hfileName;
}
public HFileContext(boolean useHBaseChecksum, boolean includesMvcc, boolean includesTags,
Compression.Algorithm compressAlgo, boolean compressTags, ChecksumType checksumType,
int bytesPerChecksum, int blockSize, DataBlockEncoding encoding,
- Encryption.Context cryptoContext, long fileCreateTime) {
+ Encryption.Context cryptoContext, long fileCreateTime, String hfileName) {
this.usesHBaseChecksum = useHBaseChecksum;
this.includesMvcc = includesMvcc;
this.includesTags = includesTags;
@@ -96,6 +98,7 @@ public class HFileContext implements HeapSize, Cloneable {
}
this.cryptoContext = cryptoContext;
this.fileCreateTime = fileCreateTime;
+ this.hfileName = hfileName;
}
/**
@@ -187,6 +190,14 @@ public class HFileContext implements HeapSize, Cloneable {
this.cryptoContext = cryptoContext;
}
+ public String getHFileName() {
+ return this.hfileName;
+ }
+
+ public void setHFileName(String name) {
+ this.hfileName = name;
+ }
+
/**
* HeapSize implementation
* NOTE : The heapsize should be altered as and when new state variable are added
@@ -196,11 +207,14 @@ public class HFileContext implements HeapSize, Cloneable {
public long heapSize() {
long size = ClassSize.align(ClassSize.OBJECT +
// Algorithm reference, encodingon, checksumtype, Encryption.Context reference
- 4 * ClassSize.REFERENCE +
+ 5 * ClassSize.REFERENCE +
2 * Bytes.SIZEOF_INT +
// usesHBaseChecksum, includesMvcc, includesTags and compressTags
4 * Bytes.SIZEOF_BOOLEAN +
Bytes.SIZEOF_LONG);
+ if (this.hfileName != null) {
+ size += ClassSize.STRING + this.hfileName.length();
+ }
return size;
}
@@ -227,6 +241,10 @@ public class HFileContext implements HeapSize, Cloneable {
sb.append(" compressAlgo="); sb.append(compressAlgo);
sb.append(" compressTags="); sb.append(compressTags);
sb.append(" cryptoContext=[ "); sb.append(cryptoContext); sb.append(" ]");
+ if (hfileName != null) {
+ sb.append(" name=");
+ sb.append(hfileName);
+ }
sb.append(" ]");
return sb.toString();
}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java
index a903974..19a323a 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileContextBuilder.java
@@ -53,6 +53,8 @@ public class HFileContextBuilder {
private Encryption.Context cryptoContext = Encryption.Context.NONE;
private long fileCreateTime = 0;
+ private String hfileName = null;
+
public HFileContextBuilder withHBaseCheckSum(boolean useHBaseCheckSum) {
this.usesHBaseChecksum = useHBaseCheckSum;
return this;
@@ -108,9 +110,14 @@ public class HFileContextBuilder {
return this;
}
+ public HFileContextBuilder withPathName(String name) {
+ this.hfileName = name;
+ return this;
+ }
+
public HFileContext build() {
return new HFileContext(usesHBaseChecksum, includesMvcc, includesTags, compression,
compressTags, checksumType, bytesPerChecksum, blocksize, encoding, cryptoContext,
- fileCreateTime);
+ fileCreateTime, hfileName);
}
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
index a95da7b..6b969f9 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java
@@ -288,6 +288,17 @@ public class HalfStoreFileReader extends StoreFile.Reader {
public Cell getNextIndexedKey() {
return null;
}
+
+ @Override
+ public void returnCurrentBatch() {
+ this.delegate.returnCurrentBatch();
+ }
+
+ @Override
+ public void close() {
+ this.delegate.close();
+ }
+
};
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 57c4be9..0baa6b0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -116,4 +116,23 @@ public interface BlockCache extends Iterable {
* @return The list of sub blockcaches that make up this one; returns null if no sub caches.
*/
BlockCache [] getBlockCaches();
+
+ /**
+ * Called when the block usage is over
+ * @param cacheKey
+ * @param block
+ * @return true if returned block happened, false otherwise
+ */
+ boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block);
+
+ /**
+ * An enum that indicates if the block retrieved from the cache is from shared memory cache or
+ * non shared memory. When we say shared memory it means we try to server the reads from those
+ * cache without doing any copy and all the cells belonging to these blocks cannot be evicted
+ * till the results are consumed. For the non-shared memory case, we don't have any such
+ * constraint
+ */
+ public static enum MemoryType {
+ SHARED_MEM, NON_SHARED_MEM;
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 7725cf9..affda8b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -23,8 +23,11 @@ import java.util.Iterator;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
+import com.google.common.annotations.VisibleForTesting;
+
/**
* CombinedBlockCache is an abstraction layer that combines
@@ -219,4 +222,27 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize {
public void setMaxSize(long size) {
this.lruCache.setMaxSize(size);
}
+
+ @Override
+ public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) {
+ assert block.getCacheType() != CacheType.NOT_CACHED;
+ if (block.getCacheType() == CacheType.L1_CACHED) {
+ // At the time when this block was served, it was in L1 cache. Even if it is transferred to L2
+ // cache by this time, no need to contact L2 cache.
+ return this.lruCache.returnBlock(cacheKey, block);
+ } else if (block.getCacheType() == CacheType.L2_CACHED) {
+ return this.l2Cache.returnBlock(cacheKey, block);
+ }
+ return true;
+ }
+
+ @VisibleForTesting
+ public int getRefCount(BlockCacheKey cacheKey) {
+ // Check this
+ if (this.l2Cache != null) {
+ return ((BucketCache) this.l2Cache).getRefCount(cacheKey);
+ } else {
+ return 0;
+ }
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index 6c8260d..b2975b8 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -26,7 +26,6 @@ import java.io.DataOutputStream;
import java.io.IOException;
import java.io.SequenceInputStream;
import java.net.InetSocketAddress;
-import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
@@ -40,7 +39,6 @@ import java.util.concurrent.atomic.AtomicLong;
import org.apache.hadoop.hbase.util.ByteStringer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
@@ -55,6 +53,7 @@ import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos;
@@ -373,6 +372,11 @@ public class HFile {
final boolean updateCacheMetrics, BlockType expectedBlockType,
DataBlockEncoding expectedDataBlockEncoding)
throws IOException;
+ /**
+ * Return the given block back to the cache, if it was obtained from cache.
+ * @param block Block to be returned.
+ */
+ void returnBlock(HFileBlock block);
}
/** An interface used by clients to open and iterate an {@link HFile}. */
@@ -388,7 +392,7 @@ public class HFile {
HFileScanner getScanner(boolean cacheBlocks, final boolean pread, final boolean isCompaction);
- ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
+ HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock) throws IOException;
Map loadFileInfo() throws IOException;
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
index a64bb94..d53d949 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java
@@ -41,6 +41,7 @@ import org.apache.hadoop.hbase.io.encoding.HFileBlockDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultDecodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockDefaultEncodingContext;
import org.apache.hadoop.hbase.io.encoding.HFileBlockEncodingContext;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
import org.apache.hadoop.hbase.util.ByteBufferUtils;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ChecksumType;
@@ -192,6 +193,9 @@ public class HFileBlock implements Cacheable {
*/
private int nextBlockOnDiskSizeWithHeader = -1;
+ private CacheType cacheType = CacheType.NOT_CACHED;
+ private MemoryType memType = MemoryType.NON_SHARED_MEM;
+
/**
* Creates a new {@link HFile} block from the given fields. This constructor
* is mostly used when the block data has already been read and uncompressed,
@@ -637,7 +641,7 @@ public class HFileBlock implements Cacheable {
long size = ClassSize.align(
ClassSize.OBJECT +
// Block type, byte buffer and meta references
- 3 * ClassSize.REFERENCE +
+ 5 * ClassSize.REFERENCE +
// On-disk size, uncompressed size, and next block's on-disk size
// bytePerChecksum and onDiskDataSize
4 * Bytes.SIZEOF_INT +
@@ -1871,6 +1875,26 @@ public class HFileBlock implements Cacheable {
return this.fileContext;
}
+ public void setCacheType(CacheType cacheType) {
+ this.cacheType = cacheType;
+ }
+
+ public void setMemoryType(MemoryType memType) {
+ this.memType = memType;
+ }
+
+ public MemoryType getMemoryType() {
+ return this.memType;
+ }
+
+ public CacheType getCacheType() {
+ return this.cacheType;
+ }
+
+ public static enum CacheType {
+ L1_CACHED, L2_CACHED, NOT_CACHED;
+ }
+
/**
* Convert the contents of the block header into a human readable string.
* This is mostly helpful for debugging. This assumes that the block
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
index e6e1fff..fbc14c0 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
@@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicReference;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.hbase.Cell;
@@ -40,6 +39,7 @@ import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.io.hfile.HFile.CachingBlockReader;
@@ -235,76 +235,85 @@ public class HFileBlockIndex {
int lookupLevel = 1; // How many levels deep we are in our lookup.
int index = -1;
- HFileBlock block;
+ HFileBlock block = null;
+ boolean dataBlock = false;
while (true) {
-
- if (currentBlock != null && currentBlock.getOffset() == currentOffset)
- {
- // Avoid reading the same block again, even with caching turned off.
- // This is crucial for compaction-type workload which might have
- // caching turned off. This is like a one-block cache inside the
- // scanner.
- block = currentBlock;
- } else {
- // Call HFile's caching block reader API. We always cache index
- // blocks, otherwise we might get terrible performance.
- boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
- BlockType expectedBlockType;
- if (lookupLevel < searchTreeLevel - 1) {
- expectedBlockType = BlockType.INTERMEDIATE_INDEX;
- } else if (lookupLevel == searchTreeLevel - 1) {
- expectedBlockType = BlockType.LEAF_INDEX;
+ try {
+ if (currentBlock != null && currentBlock.getOffset() == currentOffset) {
+ // Avoid reading the same block again, even with caching turned off.
+ // This is crucial for compaction-type workload which might have
+ // caching turned off. This is like a one-block cache inside the
+ // scanner.
+ block = currentBlock;
} else {
- // this also accounts for ENCODED_DATA
- expectedBlockType = BlockType.DATA;
+ // Call HFile's caching block reader API. We always cache index
+ // blocks, otherwise we might get terrible performance.
+ boolean shouldCache = cacheBlocks || (lookupLevel < searchTreeLevel);
+ BlockType expectedBlockType;
+ if (lookupLevel < searchTreeLevel - 1) {
+ expectedBlockType = BlockType.INTERMEDIATE_INDEX;
+ } else if (lookupLevel == searchTreeLevel - 1) {
+ expectedBlockType = BlockType.LEAF_INDEX;
+ } else {
+ // this also accounts for ENCODED_DATA
+ expectedBlockType = BlockType.DATA;
+ }
+ block = cachingBlockReader.readBlock(currentOffset, currentOnDiskSize, shouldCache,
+ pread, isCompaction, true, expectedBlockType, expectedDataBlockEncoding);
}
- block = cachingBlockReader.readBlock(currentOffset,
- currentOnDiskSize, shouldCache, pread, isCompaction, true,
- expectedBlockType, expectedDataBlockEncoding);
- }
- if (block == null) {
- throw new IOException("Failed to read block at offset " +
- currentOffset + ", onDiskSize=" + currentOnDiskSize);
- }
+ if (block == null) {
+ throw new IOException("Failed to read block at offset " + currentOffset
+ + ", onDiskSize=" + currentOnDiskSize);
+ }
- // Found a data block, break the loop and check our level in the tree.
- if (block.getBlockType().isData()) {
- break;
- }
+ // Found a data block, break the loop and check our level in the tree.
+ if (block.getBlockType().isData()) {
+ dataBlock = true;
+ break;
+ }
- // Not a data block. This must be a leaf-level or intermediate-level
- // index block. We don't allow going deeper than searchTreeLevel.
- if (++lookupLevel > searchTreeLevel) {
- throw new IOException("Search Tree Level overflow: lookupLevel="+
- lookupLevel + ", searchTreeLevel=" + searchTreeLevel);
- }
+ // Not a data block. This must be a leaf-level or intermediate-level
+ // index block. We don't allow going deeper than searchTreeLevel.
+ if (++lookupLevel > searchTreeLevel) {
+ throw new IOException("Search Tree Level overflow: lookupLevel=" + lookupLevel
+ + ", searchTreeLevel=" + searchTreeLevel);
+ }
- // Locate the entry corresponding to the given key in the non-root
- // (leaf or intermediate-level) index block.
- ByteBuffer buffer = block.getBufferWithoutHeader();
- index = locateNonRootIndexEntry(buffer, key, comparator);
- if (index == -1) {
- // This has to be changed
- // For now change this to key value
- KeyValue kv = KeyValueUtil.ensureKeyValue(key);
- throw new IOException("The key "
- + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength())
- + " is before the" + " first key of the non-root index block "
- + block);
- }
+ // Locate the entry corresponding to the given key in the non-root
+ // (leaf or intermediate-level) index block.
+ ByteBuffer buffer = block.getBufferWithoutHeader();
+ index = locateNonRootIndexEntry(buffer, key, comparator);
+ if (index == -1) {
+ // This has to be changed
+ // For now change this to key value
+ KeyValue kv = KeyValueUtil.ensureKeyValue(key);
+ throw new IOException("The key "
+ + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength())
+ + " is before the" + " first key of the non-root index block " + block);
+ }
- currentOffset = buffer.getLong();
- currentOnDiskSize = buffer.getInt();
+ currentOffset = buffer.getLong();
+ currentOnDiskSize = buffer.getInt();
- // Only update next indexed key if there is a next indexed key in the current level
- byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
- if (tmpNextIndexedKey != null) {
- nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
+ // Only update next indexed key if there is a next indexed key in the
+ // current level
+ byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
+ if (tmpNextIndexedKey != null) {
+ nextIndexedKey = new KeyValue.KeyOnlyKeyValue(tmpNextIndexedKey);
+ }
+ } finally {
+ if (!dataBlock) {
+ // Return the block immediately if it is not the
+ // data block
+ cachingBlockReader.returnBlock(block);
+ }
}
}
if (lookupLevel != searchTreeLevel) {
+ assert dataBlock == true;
+ cachingBlockReader.returnBlock(block);
throw new IOException("Reached a data block at level " + lookupLevel +
" but the number of levels is " + searchTreeLevel);
}
@@ -349,6 +358,7 @@ public class HFileBlockIndex {
int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset
+ SECONDARY_INDEX_ENTRY_OVERHEAD;
targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen);
+ cachingBlockReader.returnBlock(midLeafBlock);
} else {
// The middle of the root-level index.
targetMidKey = blockKeys[rootCount / 2];
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
index 4d1881d..fdf7fa6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java
@@ -39,7 +39,7 @@ import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.NoTagsKeyValue;
-import org.apache.hadoop.hbase.KeyValue.KVComparator;
+import org.apache.hadoop.hbase.SharedMemoryBackedCell;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
import org.apache.hadoop.hbase.io.compress.Compression;
@@ -56,6 +56,7 @@ import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.IdLock;
import org.apache.hadoop.io.WritableUtils;
import org.apache.htrace.Trace;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType;
import org.apache.htrace.TraceScope;
import com.google.common.annotations.VisibleForTesting;
@@ -256,6 +257,14 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
}
HFileBlock block = readBlock(offset, onDiskSize, true, false, false, false,
null, null);
+ // No need to update cur block with fetcher. Ideally here the
+ // readBlock wont find the
+ // block in cache. We call this readBlock so that block data is
+ // read from FS and
+ // cached in BC. Even if we have to returnBlock to make sure it is
+ // being returned,
+ // just return it immediately.
+ returnBlock(block);
prevBlock = block;
offset += block.getOnDiskSizeWithHeader();
}
@@ -335,6 +344,15 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
return fileSize;
}
+ @Override
+ public void returnBlock(HFileBlock block) {
+ BlockCache blockCache = this.cacheConf.getBlockCache();
+ if (blockCache != null && block != null && block.getCacheType() != CacheType.NOT_CACHED) {
+ BlockCacheKey cacheKey = new BlockCacheKey(this.getFileContext().getHFileName(),
+ block.getOffset());
+ blockCache.returnBlock(cacheKey, block);
+ }
+ }
/**
* @return the first key in the file. May be null if file has no entries. Note
* that this is not the first row key, but rather the byte form of the
@@ -446,8 +464,6 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
protected final HFile.Reader reader;
private int currTagsLen;
- protected HFileBlock block;
-
/**
* The next indexed key is to keep track of the indexed key of the next data block.
* If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
@@ -456,6 +472,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
* If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
*/
protected Cell nextIndexedKey;
+ // Current block being used
+ protected HFileBlock curBlock;
+ // Previous blocks that were used in the course of the read
+ protected ArrayList prevBlocks = null;
public HFileScannerImpl(final HFile.Reader reader, final boolean cacheBlocks,
final boolean pread, final boolean isCompaction) {
@@ -463,8 +483,43 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
this.cacheBlocks = cacheBlocks;
this.pread = pread;
this.isCompaction = isCompaction;
+ prevBlocks = new ArrayList();
+ }
+
+ void addToCurrentBlock(HFileBlock block) {
+ if (block != null && this.curBlock != null && block.getOffset() == this.curBlock.getOffset()) {
+ return;
+ }
+ if (this.curBlock != null) {
+ prevBlocks.add(this.curBlock);
+ }
+ this.curBlock = block;
+ }
+
+ void reset() {
+ if (this.curBlock != null) {
+ this.prevBlocks.add(this.curBlock);
+ }
+ this.curBlock = null;
+ }
+
+ private void returnBlockToCache(HFileBlock block) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Returning the block : " + block);
+ }
+ this.reader.returnBlock(block);
}
+ void returnBlocks(boolean returnAll) {
+ for (int i = 0; i < this.prevBlocks.size(); i++) {
+ returnBlockToCache(this.prevBlocks.get(i));
+ }
+ this.prevBlocks.clear();
+ if (returnAll && this.curBlock != null) {
+ returnBlockToCache(this.curBlock);
+ this.curBlock = null;
+ }
+ }
@Override
public boolean isSeeked(){
return blockBuffer != null;
@@ -493,6 +548,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
return kvBufSize;
}
+ @Override
+ public void close() {
+ this.returnBlocks(true);
+ }
+
protected int getNextCellStartPosition() {
int nextKvPos = blockBuffer.position() + KEY_VALUE_LEN_SIZE + currKeyLen + currValueLen
+ currMemstoreTSLen;
@@ -528,9 +588,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
private final void checkTagsLen() {
if (checkLen(this.currTagsLen)) {
- throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen +
- ". Block offset: " + block.getOffset() + ", block length: " + this.blockBuffer.limit() +
- ", position: " + this.blockBuffer.position() + " (without header).");
+ throw new IllegalStateException("Invalid currTagsLen " + this.currTagsLen
+ + ". Block offset: " + curBlock.getOffset() + ", block length: "
+ + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position()
+ + " (without header).");
}
}
@@ -607,7 +668,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
|| vlen > blockBuffer.limit()) {
throw new IllegalStateException("Invalid klen " + klen + " or vlen "
+ vlen + ". Block offset: "
- + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
+ + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
+ blockBuffer.position() + " (without header).");
}
ByteBufferUtils.skip(blockBuffer, klen + vlen);
@@ -616,7 +677,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
tlen = ((blockBuffer.get() & 0xff) << 8) ^ (blockBuffer.get() & 0xff);
if (tlen < 0 || tlen > blockBuffer.limit()) {
throw new IllegalStateException("Invalid tlen " + tlen + ". Block offset: "
- + block.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
+ + curBlock.getOffset() + ", block length: " + blockBuffer.limit() + ", position: "
+ blockBuffer.position() + " (without header).");
}
ByteBufferUtils.skip(blockBuffer, tlen);
@@ -643,8 +704,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
throw new IllegalStateException("blockSeek with seekBefore "
+ "at the first key of the block: key="
+ CellUtil.getCellKeyAsString(key)
- + ", blockOffset=" + block.getOffset() + ", onDiskSize="
- + block.getOnDiskSizeWithHeader());
+ + ", blockOffset=" + curBlock.getOffset() + ", onDiskSize="
+ + curBlock.getOnDiskSizeWithHeader());
}
blockBuffer.position(blockBuffer.position() - lastKeyValueSize);
readKeyValueLen();
@@ -708,15 +769,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
// The comparison with no_next_index_key has to be checked
if (this.nextIndexedKey != null &&
(this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || reader
- .getComparator().compareKeyIgnoresMvcc(key, nextIndexedKey) < 0)) {
+ .getComparator().compare(key, nextIndexedKey) < 0)) {
// The reader shall continue to scan the current data block instead
// of querying the
// block index as long as it knows the target key is strictly
// smaller than
// the next indexed key or the current data block is the last data
// block.
- return loadBlockAndSeekToKey(this.block, nextIndexedKey, false, key, false);
+ return loadBlockAndSeekToKey(this.curBlock, nextIndexedKey, false, key,
+ false);
}
+
}
}
// Don't rewind on a reseek operation, because reseek implies that we are
@@ -740,10 +803,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
*/
public int seekTo(Cell key, boolean rewind) throws IOException {
HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader();
- BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key, block,
- cacheBlocks, pread, isCompaction, getEffectiveDataBlockEncoding());
+ BlockWithScanInfo blockWithScanInfo = indexReader.loadDataBlockWithScanInfo(key,
+ curBlock, cacheBlocks, pread, isCompaction,
+ getEffectiveDataBlockEncoding());
if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
- // This happens if the key e.g. falls before the beginning of the file.
+ // This happens if the key e.g. falls before the beginning of the
+ // file.
return -1;
}
return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
@@ -752,17 +817,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
@Override
public boolean seekBefore(Cell key) throws IOException {
- HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key, block,
- cacheBlocks, pread, isCompaction, reader.getEffectiveEncodingInCache(isCompaction));
+ HFileBlock seekToBlock = reader.getDataBlockIndexReader().seekToDataBlock(key,
+ curBlock, cacheBlocks, pread, isCompaction,
+ reader.getEffectiveEncodingInCache(isCompaction));
if (seekToBlock == null) {
return false;
}
ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock);
- if (reader.getComparator()
- .compareKeyIgnoresMvcc(
- new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(),
- firstKey.limit()), key) >= 0) {
+ if (reader.getComparator().compareKeyIgnoresMvcc(
+ new KeyValue.KeyOnlyKeyValue(firstKey.array(), firstKey.arrayOffset(), firstKey.limit()),
+ key) >= 0) {
long previousBlockOffset = seekToBlock.getPrevBlockOffset();
// The key we are interested in
if (previousBlockOffset == -1) {
@@ -773,6 +838,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
// It is important that we compute and pass onDiskSize to the block
// reader so that it does not have to read the header separately to
// figure out the size.
+ reader.returnBlock(seekToBlock);
seekToBlock = reader.readBlock(previousBlockOffset,
seekToBlock.getOffset() - previousBlockOffset, cacheBlocks,
pread, isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
@@ -793,17 +859,17 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
*/
protected HFileBlock readNextDataBlock() throws IOException {
long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
- if (block == null)
+ if (curBlock == null)
return null;
- HFileBlock curBlock = block;
+ HFileBlock curBlock = this.curBlock;
do {
if (curBlock.getOffset() >= lastDataBlockOffset)
return null;
if (curBlock.getOffset() < 0) {
- throw new IOException("Invalid block file offset: " + block);
+ throw new IOException("Invalid block file offset: " + curBlock);
}
// We are reading the next block without block type validation, because
@@ -812,6 +878,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
+ curBlock.getOnDiskSizeWithHeader(),
curBlock.getNextBlockOnDiskSizeWithHeader(), cacheBlocks, pread,
isCompaction, true, null, getEffectiveDataBlockEncoding());
+ if (curBlock != null && !curBlock.getBlockType().isData()) {
+ // Whatever block we read we will be returning it unless
+ // it is a datablock. Just in case the blocks are non data blocks
+ reader.returnBlock(curBlock);
+ }
} while (!curBlock.getBlockType().isData());
return curBlock;
@@ -823,23 +894,54 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
@Override
public Cell getKeyValue() {
- if (!isSeeked())
- return null;
+ if (!isSeeked()) return null;
- if(currTagsLen > 0) {
- KeyValue ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
- + blockBuffer.position(), getCellBufSize());
- if (this.reader.shouldIncludeMemstoreTS()) {
- ret.setSequenceId(currMemstoreTS);
+ KeyValue ret = null;
+ if (currTagsLen > 0) {
+ if (this.curBlock.getMemoryType() == BlockCache.MemoryType.SHARED_MEM) {
+ ret = new SharedMemoryKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ + blockBuffer.position(), getCellBufSize());
+ } else {
+ ret = new KeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ + blockBuffer.position(), getCellBufSize());
}
- return ret;
} else {
- NoTagsKeyValue ret = new NoTagsKeyValue(blockBuffer.array(),
- blockBuffer.arrayOffset() + blockBuffer.position(), getCellBufSize());
- if (this.reader.shouldIncludeMemstoreTS()) {
- ret.setSequenceId(currMemstoreTS);
+ if (this.curBlock.getMemoryType() == BlockCache.MemoryType.SHARED_MEM) {
+ ret = new SharedMemoryNoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ + blockBuffer.position(), getCellBufSize());
+ } else {
+ ret = new NoTagsKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ + blockBuffer.position(), getCellBufSize());
}
- return ret;
+ }
+ if (this.reader.shouldIncludeMemstoreTS()) {
+ ret.setSequenceId(currMemstoreTS);
+ }
+ return ret;
+ }
+
+ private static class SharedMemoryKeyValue extends KeyValue implements SharedMemoryBackedCell {
+ public SharedMemoryKeyValue(byte[] bytes, int offset, int length) {
+ super(bytes, offset, length);
+ }
+
+ @Override
+ public Cell copy() {
+ byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
+ return new KeyValue(copy, 0, copy.length);
+ }
+ }
+
+ private static class SharedMemoryNoTagsKeyValue extends NoTagsKeyValue implements
+ SharedMemoryBackedCell {
+ public SharedMemoryNoTagsKeyValue(byte[] bytes, int offset, int length) {
+ super(bytes, offset, length);
+ }
+
+ @Override
+ public Cell copy() {
+ byte[] copy = Bytes.copy(this.bytes, this.offset, this.length);
+ return new KeyValue(copy, 0, copy.length);
}
}
@@ -872,7 +974,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
}
protected void setNonSeekedState() {
- block = null;
+ reset();
blockBuffer = null;
currKeyLen = 0;
currValueLen = 0;
@@ -892,7 +994,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
+ "; currKeyLen = " + currKeyLen + "; currValLen = "
+ currValueLen + "; block limit = " + blockBuffer.limit()
+ "; HFile name = " + reader.getName()
- + "; currBlock currBlockOffset = " + block.getOffset());
+ + "; currBlock currBlockOffset = " + this.curBlock.getOffset());
throw e;
}
}
@@ -905,7 +1007,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
private boolean positionForNextBlock() throws IOException {
// Methods are small so they get inlined because they are 'hot'.
long lastDataBlockOffset = reader.getTrailer().getLastDataBlockOffset();
- if (block.getOffset() >= lastDataBlockOffset) {
+ if (this.curBlock.getOffset() >= lastDataBlockOffset) {
setNonSeekedState();
return false;
}
@@ -920,7 +1022,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
setNonSeekedState();
return false;
}
- updateCurrBlock(nextBlock);
+ updateCurrentBlock(nextBlock);
return true;
}
@@ -969,27 +1071,38 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
return false;
}
- long firstDataBlockOffset =
- reader.getTrailer().getFirstDataBlockOffset();
- if (block != null && block.getOffset() == firstDataBlockOffset) {
- blockBuffer.rewind();
- readKeyValueLen();
- return true;
+ long firstDataBlockOffset = reader.getTrailer().getFirstDataBlockOffset();
+ if (curBlock != null
+ && curBlock.getOffset() == firstDataBlockOffset) {
+ return processFirstDataBlock();
}
- block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
+ readAndUpdateNewBlock(firstDataBlockOffset);
+ return true;
+
+ }
+
+ protected boolean processFirstDataBlock() throws IOException{
+ blockBuffer.rewind();
+ readKeyValueLen();
+ return true;
+ }
+
+ protected void readAndUpdateNewBlock(long firstDataBlockOffset) throws IOException,
+ CorruptHFileException {
+ HFileBlock newBlock = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
- if (block.getOffset() < 0) {
- throw new IOException("Invalid block offset: " + block.getOffset());
+ if (newBlock.getOffset() < 0) {
+ throw new IOException("Invalid block offset: " + newBlock.getOffset());
}
- updateCurrBlock(block);
- return true;
+ updateCurrentBlock(newBlock);
}
-
+
protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
boolean rewind, Cell key, boolean seekBefore) throws IOException {
- if (block == null || block.getOffset() != seekToBlock.getOffset()) {
- updateCurrBlock(seekToBlock);
+ if (this.curBlock == null
+ || this.curBlock.getOffset() != seekToBlock.getOffset()) {
+ updateCurrentBlock(seekToBlock);
} else if (rewind) {
blockBuffer.rewind();
}
@@ -1012,10 +1125,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
*/
protected final void checkKeyValueLen() {
if (checkLen(this.currKeyLen) || checkLen(this.currValueLen)) {
- throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen +
- " or currValueLen " + this.currValueLen + ". Block offset: " + block.getOffset() +
- ", block length: " + this.blockBuffer.limit() + ", position: " +
- this.blockBuffer.position() + " (without header).");
+ throw new IllegalStateException("Invalid currKeyLen " + this.currKeyLen
+ + " or currValueLen " + this.currValueLen + ". Block offset: "
+ + this.curBlock.getOffset() + ", block length: "
+ + this.blockBuffer.limit() + ", position: " + this.blockBuffer.position()
+ + " (without header).");
}
}
@@ -1025,19 +1139,18 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
*
* @param newBlock the block to make current
*/
- protected void updateCurrBlock(HFileBlock newBlock) {
- block = newBlock;
-
+ protected void updateCurrentBlock(HFileBlock newBlock) throws IOException {
+ // Set the active block on the reader
// sanity check
- if (block.getBlockType() != BlockType.DATA) {
- throw new IllegalStateException("Scanner works only on data " +
- "blocks, got " + block.getBlockType() + "; " +
- "fileName=" + reader.getName() + ", " +
- "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " +
- "isCompaction=" + isCompaction);
+ if (newBlock.getBlockType() != BlockType.DATA) {
+ throw new IllegalStateException("ScannerV2 works only on data " + "blocks, got "
+ + curBlock.getBlockType() + "; " + "fileName=" + reader.getName()
+ + ", " + "dataBlockEncoder=" + reader.getDataBlockEncoding() + ", " + "isCompaction="
+ + isCompaction);
}
- blockBuffer = block.getBufferWithoutHeader();
+ addToCurrentBlock(newBlock);
+ blockBuffer = newBlock.getBufferWithoutHeader();
readKeyValueLen();
blockFetches++;
@@ -1077,6 +1190,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
new KeyValue.KeyOnlyKeyValue(blockBuffer.array(), blockBuffer.arrayOffset()
+ blockBuffer.position() + KEY_VALUE_LEN_SIZE, currKeyLen));
}
+
+ @Override
+ public void returnCurrentBatch() {
+ this.returnBlocks(false);
+ }
+
}
public Path getPath() {
@@ -1131,6 +1250,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
private HFileBlock getCachedBlock(BlockCacheKey cacheKey, boolean cacheBlock, boolean useLock,
boolean isCompaction, boolean updateCacheMetrics, BlockType expectedBlockType,
DataBlockEncoding expectedDataBlockEncoding) throws IOException {
+ // Ideally if we can make the scanner to work at this level it is much easier because this method
+ // surely knows what was fetched from the cache and based on that we could keep returning the prev
+ // blocks
+
// Check cache for block. If found return.
if (cacheConf.isBlockCacheEnabled()) {
BlockCache cache = cacheConf.getBlockCache();
@@ -1138,7 +1261,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
updateCacheMetrics);
if (cachedBlock != null) {
if (cacheConf.shouldCacheCompressed(cachedBlock.getBlockType().getCategory())) {
- cachedBlock = cachedBlock.unpack(hfileContext, fsBlockReader);
+ HFileBlock compressedBlock = cachedBlock;
+ cachedBlock = compressedBlock.unpack(hfileContext, fsBlockReader);
+ // In case of compressed block after unpacking we can return the compressed block
+ if (compressedBlock != cachedBlock) cache.returnBlock(cacheKey, compressedBlock);
}
validateBlockType(cachedBlock, expectedBlockType);
@@ -1174,6 +1300,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
" because of a data block encoding mismatch" +
"; expected: " + expectedDataBlockEncoding +
", actual: " + actualDataBlockEncoding);
+ // This is an error scenario. so here we need to decrement the
+ // count.
+ cache.returnBlock(cacheKey, cachedBlock);
cache.evictBlock(cacheKey);
}
return null;
@@ -1191,7 +1320,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
* @throws IOException
*/
@Override
- public ByteBuffer getMetaBlock(String metaBlockName, boolean cacheBlock)
+ public HFileBlock getMetaBlock(String metaBlockName, boolean cacheBlock)
throws IOException {
if (trailer.getMetaIndexCount() == 0) {
return null; // there are no meta blocks
@@ -1223,7 +1352,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
assert cachedBlock.isUnpacked() : "Packed block leak.";
// Return a distinct 'shallow copy' of the block,
// so pos does not get messed by the scanner
- return cachedBlock.getBufferWithoutHeader();
+ return cachedBlock;
}
// Cache Miss, please load.
}
@@ -1237,7 +1366,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
cacheConf.isInMemory(), this.cacheConf.isCacheDataInL1());
}
- return metaBlock.getBufferWithoutHeader();
+ return metaBlock;
}
}
@@ -1434,7 +1563,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
@Override
public boolean isSeeked(){
- return this.block != null;
+ return curBlock != null;
+ }
+
+ public void setNonSeekedState() {
+ reset();
}
/**
@@ -1444,22 +1577,20 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
* @param newBlock the block to make current
* @throws CorruptHFileException
*/
- private void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
- block = newBlock;
-
+ @Override
+ protected void updateCurrentBlock(HFileBlock newBlock) throws CorruptHFileException {
// sanity checks
- if (block.getBlockType() != BlockType.ENCODED_DATA) {
- throw new IllegalStateException(
- "EncodedScanner works only on encoded data blocks");
+ if (newBlock.getBlockType() != BlockType.ENCODED_DATA) {
+ throw new IllegalStateException("EncodedScanner works only on encoded data blocks");
}
- short dataBlockEncoderId = block.getDataBlockEncodingId();
+ short dataBlockEncoderId = newBlock.getDataBlockEncodingId();
if (!DataBlockEncoding.isCorrectEncoder(dataBlockEncoder, dataBlockEncoderId)) {
String encoderCls = dataBlockEncoder.getClass().getName();
throw new CorruptHFileException("Encoder " + encoderCls
+ " doesn't support data block encoding "
+ DataBlockEncoding.getNameFromId(dataBlockEncoderId));
}
-
+ addToCurrentBlock(newBlock);
seeker.setCurrentBuffer(getEncodedBuffer(newBlock));
blockFetches++;
@@ -1476,31 +1607,10 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
DataBlockEncoding.ID_SIZE).slice();
return encodedBlock;
}
-
+
@Override
- public boolean seekTo() throws IOException {
- if (reader == null) {
- return false;
- }
-
- if (reader.getTrailer().getEntryCount() == 0) {
- // No data blocks.
- return false;
- }
-
- long firstDataBlockOffset =
- reader.getTrailer().getFirstDataBlockOffset();
- if (block != null && block.getOffset() == firstDataBlockOffset) {
- seeker.rewind();
- return true;
- }
-
- block = reader.readBlock(firstDataBlockOffset, -1, cacheBlocks, pread,
- isCompaction, true, BlockType.DATA, getEffectiveDataBlockEncoding());
- if (block.getOffset() < 0) {
- throw new IOException("Invalid block offset: " + block.getOffset());
- }
- updateCurrentBlock(block);
+ protected boolean processFirstDataBlock() throws IOException {
+ seeker.rewind();
return true;
}
@@ -1508,10 +1618,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
public boolean next() throws IOException {
boolean isValid = seeker.next();
if (!isValid) {
- block = readNextDataBlock();
- isValid = block != null;
+ HFileBlock newBlock = readNextDataBlock();
+ isValid = newBlock != null;
if (isValid) {
- updateCurrentBlock(block);
+ updateCurrentBlock(newBlock);
+ } else {
+ setNonSeekedState();
}
}
return isValid;
@@ -1531,7 +1643,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
@Override
public Cell getKeyValue() {
- if (block == null) {
+ if (this.curBlock == null) {
return null;
}
return seeker.getKeyValue();
@@ -1552,7 +1664,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
}
private void assertValidSeek() {
- if (block == null) {
+ if (this.curBlock == null) {
throw new NotSeekedException();
}
}
@@ -1561,9 +1673,11 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
return dataBlockEncoder.getFirstKeyInBlock(getEncodedBuffer(curBlock));
}
+ @Override
protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, Cell nextIndexedKey,
boolean rewind, Cell key, boolean seekBefore) throws IOException {
- if (block == null || block.getOffset() != seekToBlock.getOffset()) {
+ if (this.curBlock == null
+ || this.curBlock.getOffset() != seekToBlock.getOffset()) {
updateCurrentBlock(seekToBlock);
} else if (rewind) {
seeker.rewind();
@@ -1644,6 +1758,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable {
HFileContextBuilder builder = new HFileContextBuilder()
.withIncludesMvcc(shouldIncludeMemstoreTS())
.withHBaseCheckSum(true)
+ .withPathName(this.getName())
.withCompression(this.compressAlgo);
// Check for any key material available
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
index 2b6e011..e84738b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileScanner.java
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.regionserver.BatchReturnableScanner;
import org.apache.hadoop.hbase.Cell;
/**
@@ -37,7 +38,7 @@ import org.apache.hadoop.hbase.Cell;
* getValue.
*/
@InterfaceAudience.Private
-public interface HFileScanner {
+public interface HFileScanner extends BatchReturnableScanner {
/**
* SeekTo or just before the passed cell. Examine the return
* code to figure whether we found the cell or not.
@@ -149,4 +150,9 @@ public interface HFileScanner {
* @return the next key in the index (the key to seek to the next block)
*/
Cell getNextIndexedKey();
+
+ /**
+ * Does the required close operation on the current scanner
+ */
+ void close();
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
index 18dcbb0..961d7ae 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/LruBlockCache.java
@@ -34,13 +34,13 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
-import com.google.common.base.Objects;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType;
import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.ClassSize;
@@ -49,6 +49,7 @@ import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.annotate.JsonIgnoreProperties;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Objects;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
@@ -1090,4 +1091,11 @@ public class LruBlockCache implements ResizableBlockCache, HeapSize {
public BlockCache[] getBlockCaches() {
return null;
}
+
+ @Override
+ public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) {
+ assert block.getCacheType() == CacheType.L1_CACHED;
+ // Do nothing
+ return true;
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
index 57e7f28..e60711a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/MemcachedBlockCache.java
@@ -269,4 +269,10 @@ public class MemcachedBlockCache implements BlockCache {
}
}
+ @Override
+ public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) {
+ // Not handling reference counting
+ return true;
+ }
+
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index dfada87..9129474 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -43,6 +43,7 @@ import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
@@ -62,10 +63,12 @@ import org.apache.hadoop.hbase.io.hfile.CacheableDeserializer;
import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
import org.apache.hadoop.hbase.io.hfile.CachedBlock;
import org.apache.hadoop.hbase.io.hfile.HFileBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock.CacheType;
import org.apache.hadoop.hbase.util.ConcurrentIndex;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.HasThread;
import org.apache.hadoop.hbase.util.IdLock;
+import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.StringUtils;
import com.google.common.annotations.VisibleForTesting;
@@ -417,8 +420,9 @@ public class BucketCache implements BlockCache, HeapSize {
// existence here.
if (bucketEntry.equals(backingMap.get(key))) {
int len = bucketEntry.getLength();
- ByteBuffer bb = ByteBuffer.allocate(len);
- int lenRead = ioEngine.read(bb, bucketEntry.offset());
+ Pair pair = ioEngine.read(bucketEntry.offset(), len);
+ ByteBuffer bb = pair.getFirst();
+ int lenRead = bb.limit();
if (lenRead != len) {
throw new RuntimeException("Only " + lenRead + " bytes read, " + len + " expected");
}
@@ -430,6 +434,15 @@ public class BucketCache implements BlockCache, HeapSize {
cacheStats.hit(caching);
cacheStats.ioHit(timeTaken);
}
+ if (cachedBlock instanceof HFileBlock) {
+ ((HFileBlock) cachedBlock).setCacheType(CacheType.L2_CACHED);
+ ((HFileBlock) cachedBlock).setMemoryType(pair.getSecond());
+ // Once we have HBAS-11425 we can increment/decrement the ref count
+ // only on the SHARED_MEM type
+ if (pair.getSecond() == MemoryType.SHARED_MEM) {
+ bucketEntry.refCount.incrementAndGet();
+ }
+ }
bucketEntry.access(accessCount.incrementAndGet());
if (this.ioErrorStartTime > 0) {
ioErrorStartTime = -1;
@@ -463,6 +476,9 @@ public class BucketCache implements BlockCache, HeapSize {
@Override
public boolean evictBlock(BlockCacheKey cacheKey) {
+ return evictBlock(cacheKey, false);
+ }
+ public boolean evictBlock(BlockCacheKey cacheKey, boolean checkInUse) {
if (!cacheEnabled) {
return false;
}
@@ -483,10 +499,30 @@ public class BucketCache implements BlockCache, HeapSize {
IdLock.Entry lockEntry = null;
try {
lockEntry = offsetLock.getLockEntry(bucketEntry.offset());
- if (backingMap.remove(cacheKey, bucketEntry)) {
- blockEvicted(cacheKey, bucketEntry, removedBlock == null);
+ int refCount = bucketEntry.refCount.get();
+ if(refCount == 0) {
+ if (backingMap.remove(cacheKey, bucketEntry)) {
+ blockEvicted(cacheKey, bucketEntry, removedBlock == null);
+ } else {
+ return false;
+ }
} else {
- return false;
+ if(checkInUse) {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("This block " + cacheKey + " is still referred by " + refCount
+ + " readers. Can not be freed now");
+ }
+ return false;
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("This block "
+ + cacheKey
+ + " is still referred by "
+ + refCount
+ + " readers. Can not be freed now. Hence will mark this for evicting at a later point");
+ }
+ bucketEntry.markedForEvict = true;
+ }
}
} catch (IOException ie) {
LOG.warn("Failed evicting block " + cacheKey);
@@ -1102,6 +1138,10 @@ public class BucketCache implements BlockCache, HeapSize {
byte deserialiserIndex;
private volatile long accessCounter;
private BlockPriority priority;
+ // Set this when we were not able to forcefully evict the block
+ private volatile boolean markedForEvict;
+ private AtomicInteger refCount = new AtomicInteger(0);
+
/**
* Time this block was cached. Presumes we are created just before we are added to the cache.
*/
@@ -1193,9 +1233,12 @@ public class BucketCache implements BlockCache, HeapSize {
public long free(long toFree) {
Map.Entry entry;
long freedBytes = 0;
+ // TODO avoid a cycling siutation. We find no block which is not in use and so no way to free
+ // What to do then? Caching attempt fail? Need some changes in cacheBlock API?
while ((entry = queue.pollLast()) != null) {
- evictBlock(entry.getKey());
- freedBytes += entry.getValue().getLength();
+ if (evictBlock(entry.getKey(), true)) {
+ freedBytes += entry.getValue().getLength();
+ }
if (freedBytes >= toFree) {
return freedBytes;
}
@@ -1399,4 +1442,31 @@ public class BucketCache implements BlockCache, HeapSize {
public BlockCache[] getBlockCaches() {
return null;
}
+
+ @Override
+ public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) {
+ assert block.getCacheType() == CacheType.L2_CACHED;
+ // When the block was served from RAMQueueEntry, it wont set the CacheType as L2. It is still on
+ // its way to get into L2 cache. So still it is like NOT_FROM_CACHE.
+ BucketEntry bucketEntry = backingMap.get(cacheKey);
+ if (bucketEntry != null) {
+ if(block.getMemoryType() == MemoryType.SHARED_MEM) {
+ bucketEntry.refCount.decrementAndGet();
+ }
+ if(bucketEntry.markedForEvict) {
+ evictBlock(cacheKey);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ @VisibleForTesting
+ public int getRefCount(BlockCacheKey cacheKey) {
+ BucketEntry bucketEntry = backingMap.get(cacheKey);
+ if (bucketEntry != null) {
+ return bucketEntry.refCount.get();
+ }
+ return 0;
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
index de10667..3feb7e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/ByteBufferIOEngine.java
@@ -22,7 +22,9 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
import org.apache.hadoop.hbase.util.ByteBufferArray;
+import org.apache.hadoop.hbase.util.Pair;
/**
* IO engine that stores data in memory using an array of ByteBuffers
@@ -72,10 +74,11 @@ public class ByteBufferIOEngine implements IOEngine {
* @throws IOException
*/
@Override
- public int read(ByteBuffer dstBuffer, long offset) throws IOException {
- assert dstBuffer.hasArray();
- return bufferArray.getMultiple(offset, dstBuffer.remaining(), dstBuffer.array(),
+ public Pair read(long offset, int length) throws IOException {
+ ByteBuffer dstBuffer = ByteBuffer.allocate(length);
+ bufferArray.getMultiple(offset, dstBuffer.remaining(), dstBuffer.array(),
dstBuffer.arrayOffset());
+ return new Pair(dstBuffer, MemoryType.SHARED_MEM);
}
/**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
index 7b6b25f..4e1f7ff 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/FileIOEngine.java
@@ -26,6 +26,8 @@ import java.nio.channels.FileChannel;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
+import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.util.StringUtils;
/**
@@ -85,8 +87,10 @@ public class FileIOEngine implements IOEngine {
* @throws IOException
*/
@Override
- public int read(ByteBuffer dstBuffer, long offset) throws IOException {
- return fileChannel.read(dstBuffer, offset);
+ public Pair read(long offset, int length) throws IOException {
+ ByteBuffer dstBuffer = ByteBuffer.allocate(length);
+ fileChannel.read(dstBuffer, offset);
+ return new Pair(dstBuffer, MemoryType.NON_SHARED_MEM);
}
/**
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
index 430c5af..61f479c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/IOEngine.java
@@ -22,6 +22,8 @@ import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
+import org.apache.hadoop.hbase.util.Pair;
/**
* A class implementing IOEngine interface supports data services for
@@ -35,13 +37,13 @@ public interface IOEngine {
boolean isPersistent();
/**
- * Transfers data from IOEngine to the given byte buffer
- * @param dstBuffer the given byte buffer into which bytes are to be written
+ * Transfers data from IOEngine to a byte buffer
* @param offset The offset in the IO engine where the first byte to be read
- * @return number of bytes read
+ * @param length How many bytes to be read from the offset
+ * @return Pair of ByteBuffer where data is read and its MemoryType
* @throws IOException
*/
- int read(ByteBuffer dstBuffer, long offset) throws IOException;
+ Pair read(long offset, int length) throws IOException;
/**
* Transfers data from the given byte buffer to IOEngine
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java
index 26ffa95..89603a6 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/CallRunner.java
@@ -16,16 +16,15 @@ package org.apache.hadoop.hbase.ipc;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+import java.nio.ByteBuffer;
import java.nio.channels.ClosedChannelException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.ipc.RpcServer.Call;
import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
import org.apache.hadoop.hbase.monitoring.TaskMonitor;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import org.apache.htrace.Trace;
@@ -40,8 +39,6 @@ import com.google.protobuf.Message;
*/
@InterfaceAudience.Private
public class CallRunner {
- private static final Log LOG = LogFactory.getLog(CallRunner.class);
-
private Call call;
private RpcServerInterface rpcServer;
private MonitoredRPCHandler status;
@@ -91,7 +88,7 @@ public class CallRunner {
}
Throwable errorThrowable = null;
String error = null;
- Pair resultPair = null;
+ Triple result = null;
RpcServer.CurCall.set(call);
TraceScope traceScope = null;
try {
@@ -103,8 +100,8 @@ public class CallRunner {
traceScope = Trace.startSpan(call.toTraceString(), call.tinfo);
}
// make the call
- resultPair = this.rpcServer.call(call.service, call.md, call.param, call.cellScanner,
- call.timestamp, this.status);
+ result = this.rpcServer.call(call.service, call.md, call.param, call.cellScanner,
+ call.timestamp, this.status, call.connection.codec, call.connection.compressionCodec);
} catch (Throwable e) {
RpcServer.LOG.debug(Thread.currentThread().getName() + ": " + call.toShortString(), e);
errorThrowable = e;
@@ -117,7 +114,7 @@ public class CallRunner {
traceScope.close();
}
RpcServer.CurCall.set(null);
- if (resultPair != null) {
+ if (result != null) {
this.rpcServer.addCallSize(call.getSize() * -1);
sucessful = true;
}
@@ -125,9 +122,10 @@ public class CallRunner {
// Set the response for undelayed calls and delayed calls with
// undelayed responses.
if (!call.isDelayed() || !call.isReturnValueDelayed()) {
- Message param = resultPair != null ? resultPair.getFirst() : null;
- CellScanner cells = resultPair != null ? resultPair.getSecond() : null;
- call.setResponse(param, cells, errorThrowable, error);
+ Message param = result != null ? result.getFirst() : null;
+ CellScanner cells = result != null ? result.getSecond() : null;
+ ByteBuffer cellBlocks = result != null ? result.getThird() : null;
+ call.setResponse(param, cells, errorThrowable, error, cellBlocks);
}
call.sendResponseIfReady();
this.status.markComplete("Sent response");
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
index 3557768..dd01025 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServer.java
@@ -76,6 +76,7 @@ import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.ipc.IPCUtil;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Operation;
@@ -106,7 +107,7 @@ import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.hadoop.hbase.security.token.AuthenticationTokenSecretManager;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Counter;
-import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
@@ -184,7 +185,6 @@ public class RpcServer implements RpcServerInterface {
private final int warnDelayedCalls;
private AtomicInteger delayedCalls;
- private final IPCUtil ipcUtil;
private static final String AUTH_FAILED_FOR = "Auth failed for ";
private static final String AUTH_SUCCESSFUL_FOR = "Auth successful for ";
@@ -274,7 +274,7 @@ public class RpcServer implements RpcServerInterface {
private UserProvider userProvider;
private final BoundedByteBufferPool reservoir;
-
+ private final IPCUtil ipcUtil;
/**
* Datastructure that holds all necessary to a method invocation and then afterward, carries
@@ -377,7 +377,7 @@ public class RpcServer implements RpcServerInterface {
}
protected synchronized void setResponse(Object m, final CellScanner cells,
- Throwable t, String errorMsg) {
+ Throwable t, String errorMsg, ByteBuffer cellBlocks) {
if (this.isError) return;
if (t != null) this.isError = true;
BufferChain bc = null;
@@ -403,12 +403,14 @@ public class RpcServer implements RpcServerInterface {
// Set the exception as the result of the method invocation.
headerBuilder.setException(exceptionBuilder.build());
}
- // Pass reservoir to buildCellBlock. Keep reference to returne so can add it back to the
+ // Pass reservoir to buildCellBlock. Keep reference to return so can add it back to the
// reservoir when finished. This is hacky and the hack is not contained but benefits are
// high when we can avoid a big buffer allocation on each rpc.
- this.cellBlock = ipcUtil.buildCellBlock(this.connection.codec,
- this.connection.compressionCodec, cells, reservoir);
+ // The cellblock would have been already formed for this connection
+ this.cellBlock = (cellBlocks != null) ? cellBlocks : ipcUtil.buildCellBlock(
+ this.connection.codec, this.connection.compressionCodec, cells, reservoir);
if (this.cellBlock != null) {
+ // Update the cellblock so that it is put back to the BoundedByteBufferPool
CellBlockMeta.Builder cellBlockBuilder = CellBlockMeta.newBuilder();
// Presumes the cellBlock bytebuffer has been flipped so limit has total size in it.
cellBlockBuilder.setLength(this.cellBlock.limit());
@@ -462,7 +464,7 @@ public class RpcServer implements RpcServerInterface {
this.delayResponse = false;
delayedCalls.decrementAndGet();
if (this.delayReturnValue) {
- this.setResponse(result, null, null, null);
+ this.setResponse(result, null, null, null, null);
}
this.responder.doRespond(this);
}
@@ -485,7 +487,7 @@ public class RpcServer implements RpcServerInterface {
@Override
public synchronized void endDelayThrowing(Throwable t) throws IOException {
- this.setResponse(null, null, t, StringUtils.stringifyException(t));
+ this.setResponse(null, null, t, StringUtils.stringifyException(t), null);
this.delayResponse = false;
this.sendResponseIfReady();
}
@@ -1208,11 +1210,11 @@ public class RpcServer implements RpcServerInterface {
/**
* Codec the client asked use.
*/
- private Codec codec;
+ protected Codec codec;
/**
* Compression codec the client asked us use.
*/
- private CompressionCodec compressionCodec;
+ protected CompressionCodec compressionCodec;
BlockingService service;
protected UserGroupInformation user = null;
private AuthMethod authMethod;
@@ -1449,7 +1451,7 @@ public class RpcServer implements RpcServerInterface {
WritableUtils.writeString(out, errorClass);
WritableUtils.writeString(out, error);
}
- saslCall.setSaslTokenResponse(saslResponse.getByteBuffer());
+ saslCall.setSaslTokenResponse(saslResponse.flipAndReturnBuffer());
saslCall.responder = responder;
saslCall.sendResponseIfReady();
} finally {
@@ -1816,8 +1818,8 @@ public class RpcServer implements RpcServerInterface {
offset += paramSize;
}
if (header.hasCellBlockMeta()) {
- cellScanner = ipcUtil.createCellScanner(this.codec, this.compressionCodec,
- buf, offset, buf.length);
+ cellScanner = ipcUtil.createCellScanner(this.codec, this.compressionCodec, buf, offset,
+ buf.length);
}
} catch (Throwable t) {
String msg = getListenerAddress() + " is unable to read call parameter from client " +
@@ -1989,7 +1991,6 @@ public class RpcServer implements RpcServerInterface {
this.warnDelayedCalls = conf.getInt(WARN_DELAYED_CALLS, DEFAULT_WARN_DELAYED_CALLS);
this.delayedCalls = new AtomicInteger(0);
- this.ipcUtil = new IPCUtil(conf);
// Create the responder here
@@ -2002,6 +2003,7 @@ public class RpcServer implements RpcServerInterface {
}
this.scheduler = scheduler;
this.scheduler.init(new RpcSchedulerContext(this));
+ ipcUtil = IPCUtil.createInstance(this.conf);
}
/**
@@ -2023,7 +2025,7 @@ public class RpcServer implements RpcServerInterface {
private void setupResponse(ByteArrayOutputStream response, Call call, Throwable t, String error)
throws IOException {
if (response != null) response.reset();
- call.setResponse(null, null, t, error);
+ call.setResponse(null, null, t, error, null);
}
protected void closeConnection(Connection connection) {
@@ -2100,10 +2102,23 @@ public class RpcServer implements RpcServerInterface {
* the return response has protobuf response payload. On failure, the
* exception name and the stack trace are returned in the protobuf response.
*/
+ // Used by tests only
+ protected Triple call(BlockingService service,
+ MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime,
+ MonitoredRPCHandler status) throws IOException {
+ return call(service, md, param, cellScanner, receiveTime, status, null, null);
+ }
+
+ /**
+ * This is a server side method, which is invoked over RPC. On success
+ * the return response has protobuf response payload. On failure, the
+ * exception name and the stack trace are returned in the protobuf response.
+ */
@Override
- public Pair call(BlockingService service, MethodDescriptor md,
- Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status)
- throws IOException {
+ public Triple call(BlockingService service,
+ MethodDescriptor md, Message param, CellScanner cellScanner, long receiveTime,
+ MonitoredRPCHandler status, Codec codec, CompressionCodec compressionCodec)
+ throws IOException {
try {
status.setRPC(md.getName(), new Object[]{param}, receiveTime);
// TODO: Review after we add in encoded data blocks.
@@ -2112,6 +2127,11 @@ public class RpcServer implements RpcServerInterface {
//get an instance of the method arg type
long startTime = System.currentTimeMillis();
PayloadCarryingRpcController controller = new PayloadCarryingRpcController(cellScanner);
+ // see if we can go with constructors or setters
+ controller.setReservoir(reservoir);
+ controller.setCodec(codec);
+ controller.setCompressionCodec(compressionCodec);
+
Message result = service.callBlockingMethod(md, controller, param);
long endTime = System.currentTimeMillis();
int processingTime = (int) (endTime - startTime);
@@ -2141,7 +2161,8 @@ public class RpcServer implements RpcServerInterface {
status.getClient(), startTime, processingTime, qTime,
responseSize);
}
- return new Pair(result, controller.cellScanner());
+ return new Triple(result, controller.cellScanner(),
+ controller.getCellBlock());
} catch (Throwable e) {
// The above callBlockingMethod will always return a SE. Strip the SE wrapper before
// putting it on the wire. Its needed to adhere to the pb Service Interface but we don't
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
index ab8b485..af9ff8a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/ipc/RpcServerInterface.java
@@ -21,13 +21,17 @@ package org.apache.hadoop.hbase.ipc;
import java.io.IOException;
import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
+import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
+import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.security.authorize.PolicyProvider;
import com.google.common.annotations.VisibleForTesting;
@@ -48,9 +52,9 @@ public interface RpcServerInterface {
void setSocketSendBufSize(int size);
InetSocketAddress getListenerAddress();
- Pair call(BlockingService service, MethodDescriptor md,
- Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status)
- throws IOException, ServiceException;
+ Triple call(BlockingService service, MethodDescriptor md, Message param,
+ CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status,
+ Codec codec, CompressionCodec compressionCodec) throws IOException, ServiceException;
void setErrorHandler(HBaseRPCErrorHandler handler);
HBaseRPCErrorHandler getErrorHandler();
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java
index 34c749e..55419d3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/DefaultOperationQuota.java
@@ -124,6 +124,11 @@ public class DefaultOperationQuota implements OperationQuota {
}
@Override
+ public void addOperationSize(OperationType type, long size) {
+ avgOpSize.addOperationSize(type, size);
+ }
+
+ @Override
public long getAvgOperationSize(OperationType type) {
return avgOpSize.getAvgOperationSize(type);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java
index e67c7c0..82c6568 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/NoopOperationQuota.java
@@ -81,4 +81,9 @@ class NoopOperationQuota implements OperationQuota {
public long getAvgOperationSize(OperationType type) {
return -1;
}
+
+ @Override
+ public void addOperationSize(OperationType type, long size) {
+ // no-op
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java
index b885ac9..6b4ffb1 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/OperationQuota.java
@@ -125,4 +125,10 @@ public interface OperationQuota {
/** @return the average data size of the specified operation */
long getAvgOperationSize(OperationType type);
+
+ /**
+ * Add a result from scan/get. This will be used to calculate the exact quota and
+ * have a better short-read average size for the next time.
+ */
+ public void addOperationSize(OperationType type, long size);
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java
new file mode 100644
index 0000000..09d80d1
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/BatchReturnableScanner.java
@@ -0,0 +1,33 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+
+/**
+ * This interface denotes that the scanner implementing this interface
+ * allows to return the blocks involved in a scanner, as a batch
+ */
+@InterfaceAudience.Private
+public interface BatchReturnableScanner {
+
+ /**
+ * Allows the blocks being used in the current batch to be returned
+ */
+ public void returnCurrentBatch();
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java
new file mode 100644
index 0000000..ba74b80
--- /dev/null
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/GetContext.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.classification.InterfaceAudience;
+import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController;
+/**
+ * A context object to be used with Gets.
+ */
+@InterfaceAudience.Private
+public class GetContext {
+ private PayloadCarryingRpcController rpcController;
+
+ public GetContext(PayloadCarryingRpcController rpcController) {
+ this.rpcController = rpcController;
+ }
+
+ public void setCellsCountInBlock(int numberOfCells) {
+ this.rpcController.setCellsCountInBlock(numberOfCells);
+ }
+
+ public int getCellsCountInBlock() {
+ return this.rpcController.getCellsCountInBlock();
+ }
+
+ public void setCellBlock(final ArrayList cells, int totalCellsSize) throws IOException {
+ this.rpcController.setCellBlock(cells, totalCellsSize);
+ }
+
+}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index ced6ccb..64240c4 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -34,6 +34,7 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
+import java.util.ListIterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
@@ -88,6 +89,7 @@ import org.apache.hadoop.hbase.KeyValueUtil;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.NotServingRegionException;
import org.apache.hadoop.hbase.RegionTooBusyException;
+import org.apache.hadoop.hbase.SharedMemoryBackedCell;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.TagType;
@@ -2385,7 +2387,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
//////////////////////////////////////////////////////////////////////////////
@Override
- public Result getClosestRowBefore(final byte [] row, final byte [] family) throws IOException {
+ public Result getClosestRowBefore(final byte[] row, final byte[] family) throws IOException {
if (coprocessorHost != null) {
Result result = new Result();
if (coprocessorHost.preGetClosestRowBefore(row, family, result)) {
@@ -2418,11 +2420,16 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
@Override
public RegionScanner getScanner(Scan scan) throws IOException {
- return getScanner(scan, null);
+ return getScanner(scan, null, false);
}
- protected RegionScanner getScanner(Scan scan,
- List additionalScanners) throws IOException {
+ public RegionScanner getScanner(Scan scan, boolean isCellBlockSupported)
+ throws IOException {
+ return getScanner(scan, null, isCellBlockSupported);
+ }
+
+ protected RegionScanner getScanner(Scan scan, List additionalScanners,
+ boolean isCellBlockSupported) throws IOException {
startRegionOperation(Operation.SCAN);
try {
// Verify families are all valid
@@ -2436,21 +2443,21 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
checkFamily(family);
}
}
- return instantiateRegionScanner(scan, additionalScanners);
+ return instantiateRegionScanner(scan, additionalScanners, isCellBlockSupported);
} finally {
closeRegionOperation(Operation.SCAN);
}
}
protected RegionScanner instantiateRegionScanner(Scan scan,
- List additionalScanners) throws IOException {
+ List additionalScanners, boolean isCellBlockSupported) throws IOException {
if (scan.isReversed()) {
if (scan.getFilter() != null) {
scan.getFilter().setReversed(true);
}
- return new ReversedRegionScannerImpl(scan, additionalScanners, this);
+ return new ReversedRegionScannerImpl(scan, additionalScanners, this, isCellBlockSupported);
}
- return new RegionScannerImpl(scan, additionalScanners, this);
+ return new RegionScannerImpl(scan, additionalScanners, this, isCellBlockSupported);
}
@Override
@@ -5189,7 +5196,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
/**
* RegionScannerImpl is used to combine scanners from multiple Stores (aka column families).
*/
- class RegionScannerImpl implements RegionScanner {
+ class RegionScannerImpl implements RegionScanner, BatchReturnableScanner {
// Package local for testability
KeyValueHeap storeHeap = null;
/** Heap of key-values that are not essential for the provided filters and are thus read
@@ -5205,6 +5212,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
protected final byte[] stopRow;
protected final HRegion region;
protected final CellComparator comparator;
+ protected boolean copyCellsFromSharedMem;
private final long readPt;
private final long maxResultSize;
@@ -5216,8 +5224,12 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
return region.getRegionInfo();
}
- RegionScannerImpl(Scan scan, List additionalScanners, HRegion region)
- throws IOException {
+ public void setCopyCellsFromSharedMem(boolean copyCells) {
+ this.copyCellsFromSharedMem = copyCells;
+ }
+
+ RegionScannerImpl(Scan scan, List additionalScanners, HRegion region,
+ boolean isCellBlockSupported) throws IOException {
this.region = region;
this.maxResultSize = scan.getMaxResultSize();
if (scan.hasFilter()) {
@@ -5226,6 +5238,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
this.filter = null;
}
this.comparator = region.getCellCompartor();
+ this.copyCellsFromSharedMem = (isCellBlockSupported == false);
/**
* By default, calls to next/nextRaw must enforce the batch limit. Thus, construct a default
@@ -5301,6 +5314,14 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
return this.defaultScannerContext.getBatchLimit();
}
+ @Override
+ public void returnCurrentBatch() {
+ this.storeHeap.returnCurrentBatch();
+ if (joinedHeap != null) {
+ this.joinedHeap.returnCurrentBatch();
+ }
+ }
+
/**
* Reset both the filter and the old filter.
*
@@ -5348,24 +5369,50 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
// scanner is closed
throw new UnknownScannerException("Scanner was closed");
}
- boolean moreValues;
- if (outResults.isEmpty()) {
- // Usually outResults is empty. This is true when next is called
- // to handle scan or get operation.
- moreValues = nextInternal(outResults, scannerContext);
- } else {
- List tmpList = new ArrayList();
- moreValues = nextInternal(tmpList, scannerContext);
- outResults.addAll(tmpList);
- }
+ boolean moreValues = false;
+ try {
+ if (outResults.isEmpty()) {
+ // Usually outResults is empty. This is true when next is called
+ // to handle scan or get operation.
+ moreValues = nextInternal(outResults, scannerContext);
+ } else {
+ List tmpList = new ArrayList();
+ moreValues = nextInternal(tmpList, scannerContext);
+ outResults.addAll(tmpList);
+ }
- // If the size limit was reached it means a partial Result is being returned. Returning a
- // partial Result means that we should not reset the filters; filters should only be reset in
- // between rows
- if (!scannerContext.partialResultFormed()) resetFilters();
+ // If the size limit was reached it means a partial Result is being
+ // returned. Returning a
+ // partial Result means that we should not reset the filters; filters
+ // should only be reset in
+ // between rows
+ if (!scannerContext.partialResultFormed())
+ resetFilters();
- if (isFilterDoneInternal()) {
- moreValues = false;
+ if (isFilterDoneInternal()) {
+ moreValues = false;
+ }
+
+ // If copyCellsFromSharedMem = true, then we need to copy the cells. Otherwise nextRaw() is
+ // coming from RSRpcServices#scan() and we will make the cellBlock immediately and also do
+ // finalizeScan()
+ if (copyCellsFromSharedMem && !outResults.isEmpty()) {
+ // Do the copy of the results here.
+ ListIterator listItr = outResults.listIterator();
+ Cell cell = null;
+ while (listItr.hasNext()) {
+ cell = listItr.next();
+ if (cell instanceof SharedMemoryBackedCell) {
+ listItr.set(((SharedMemoryBackedCell) cell).copy());
+ }
+ }
+ }
+ } finally {
+ if (copyCellsFromSharedMem) {
+ // In case of copyCellsFromSharedMem==true (where the CPs wrap a scanner) we return
+ // the blocks then and there (for wrapped CPs)
+ this.returnCurrentBatch();
+ }
}
return moreValues;
}
@@ -5757,6 +5804,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
@Override
public synchronized void close() {
+ // The close would return all the blocks
if (storeHeap != null) {
storeHeap.close();
storeHeap = null;
@@ -6366,6 +6414,53 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
@Override
public Result get(final Get get) throws IOException {
+ prepareGet(get);
+ List results = get(get, true);
+ boolean stale = this.getRegionInfo().getReplicaId() != 0;
+ return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale);
+ }
+
+ /**
+ * Do a get based on the get parameter. When this API is used, the row data
+ * will be sent back via controller's payload.
+ *
+ * @param get
+ * @param controller
+ * @throws IOException
+ */
+ public void get(final Get get, final GetContext getCtxt)
+ throws IOException {
+ prepareGet(get);
+ RegionScanner scanner = null;
+ try {
+ ArrayList results = new ArrayList();
+ if (coprocessorHost != null) {
+ if (coprocessorHost.preGet(get, results)) {
+ // pre hook is bypassing actual get() operation
+ setResultInPayload(results, getCtxt, get.isCheckExistenceOnly());
+ return;
+ }
+ }
+
+ Scan scan = new Scan(get);
+ scanner = getScanner(scan, true);
+ scanner.next(results);
+
+ // post-get CP hook
+ if (coprocessorHost != null) {
+ coprocessorHost.postGet(get, results);
+ }
+ // do after lock
+ metricsUpdate(results);// TODO 2 times interate over cells List . Avoid.
+ setResultInPayload(results, getCtxt, get.isCheckExistenceOnly());
+ } finally {
+ if (scanner != null) {
+ scanner.close();
+ }
+ }
+ }
+
+ private void prepareGet(final Get get) throws IOException, NoSuchColumnFamilyException {
checkRow(get.getRow(), "Get");
// Verify families are all valid
if (get.hasFamilies()) {
@@ -6377,21 +6472,35 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
get.addFamily(family);
}
}
- List results = get(get, true);
- boolean stale = this.getRegionInfo().getReplicaId() != 0;
- return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale);
+ }
+
+ private void setResultInPayload(ArrayList results, GetContext ctxt,
+ boolean existsOnly) throws IOException {
+ // We return result not as CellScanner but as a cellblock ByteBuffer which
+ // is ready to be sent
+ // as response to client. Then only we can allow the L2 cache block memory
+ // to be evicted.
+ ctxt.setCellsCountInBlock(results.size());
+ if (!existsOnly) {
+ int totalCellSize = 0;
+ for (int i = 0; i < results.size(); i++) {
+ totalCellSize += KeyValueUtil.length(results.get(i));
+ // TODO no need to consider tags length? May be better use the Encoder
+ // way to find the size.
+ }
+ ctxt.setCellBlock(results, totalCellSize);
+ }
}
@Override
public List get(Get get, boolean withCoprocessor) throws IOException {
-
List results = new ArrayList();
// pre-get CP hook
if (withCoprocessor && (coprocessorHost != null)) {
- if (coprocessorHost.preGet(get, results)) {
- return results;
- }
+ if (coprocessorHost.preGet(get, results)) {
+ return results;
+ }
}
Scan scan = new Scan(get);
@@ -6409,17 +6518,22 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi
if (withCoprocessor && (coprocessorHost != null)) {
coprocessorHost.postGet(get, results);
}
-
// do after lock
+ metricsUpdate(results);
+
+ return results;
+ }
+
+ private void metricsUpdate(List results) {
if (this.metricsRegion != null) {
long totalSize = 0L;
for (Cell cell : results) {
+ // This should give an estimate of the cell in the result. Why do we need
+ // to know the serialization of how the codec works with it??
totalSize += CellUtil.estimatedSerializedSizeOf(cell);
}
this.metricsRegion.updateGet(totalSize);
}
-
- return results;
}
public void mutateRow(RowMutations rm) throws IOException {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
index 9220d07..ca8bd3c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java
@@ -20,9 +20,13 @@
package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Comparator;
+import java.util.HashSet;
+import java.util.Iterator;
import java.util.List;
import java.util.PriorityQueue;
+import java.util.Set;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
@@ -45,6 +49,10 @@ import org.apache.hadoop.hbase.regionserver.ScannerContext.NextState;
public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
implements KeyValueScanner, InternalScanner {
protected PriorityQueue heap = null;
+ // Holds the scanners when a ever a eager close() happens. All such eagerly closed
+ // scans are collected and when the final scanner.close() happens will perform the
+ // actual close.
+ protected Set scannersForDelayedClose = new HashSet();
/**
* The current sub-scanner, i.e. the one that contains the next key/value
@@ -87,7 +95,7 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
if (scanner.peek() != null) {
this.heap.add(scanner);
} else {
- scanner.close();
+ this.scannersForDelayedClose.add(scanner);
}
}
this.current = pollRealKV();
@@ -108,7 +116,8 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
Cell kvReturn = this.current.next();
Cell kvNext = this.current.peek();
if (kvNext == null) {
- this.current.close();
+ // Add the current scanner here instead of closing it.
+ this.scannersForDelayedClose.add(this.current);
this.current = pollRealKV();
} else {
KeyValueScanner topScanner = this.heap.peek();
@@ -154,7 +163,8 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
*/
if (pee == null || !moreCells) {
- this.current.close();
+ // add the scanner that is to be closed
+ this.scannersForDelayedClose.add(this.current);
} else {
this.heap.add(this.current);
}
@@ -210,6 +220,10 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
}
public void close() {
+ for (KeyValueScanner scanner : this.scannersForDelayedClose) {
+ scanner.close();
+ }
+ this.scannersForDelayedClose.clear();
if (this.current != null) {
this.current.close();
}
@@ -311,7 +325,7 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
}
if (!seekResult) {
- scanner.close();
+ this.scannersForDelayedClose.add(scanner);
} else {
heap.add(scanner);
}
@@ -364,12 +378,12 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
} else {
// Close the scanner because we did a real seek and found out there
// are no more KVs.
- kvScanner.close();
+ this.scannersForDelayedClose.add(kvScanner);
}
} else {
// Close the scanner because it has already run out of KVs even before
// we had to do a real seek on it.
- kvScanner.close();
+ this.scannersForDelayedClose.add(kvScanner);
}
kvScanner = heap.poll();
}
@@ -398,4 +412,31 @@ public class KeyValueHeap extends NonReversedNonLazyKeyValueScanner
// here we return the next index key from the top scanner
return current == null ? null : current.getNextIndexedKey();
}
+
+ @Override
+ public void returnCurrentBatch() {
+ if(current != null) {
+ if(current instanceof BatchReturnableScanner) {
+ ((BatchReturnableScanner) current).returnCurrentBatch();
+ }
+ }
+ if(heap != null) {
+ Iterator iterator = heap.iterator();
+ while (iterator.hasNext()) {
+ KeyValueScanner scanner = iterator.next();
+ if (scanner instanceof BatchReturnableScanner) {
+ ((BatchReturnableScanner) scanner).returnCurrentBatch();
+ }
+ }
+ }
+ if (scannersForDelayedClose != null) {
+ Iterator iterator = scannersForDelayedClose.iterator();
+ while (iterator.hasNext()) {
+ KeyValueScanner scanner = iterator.next();
+ if (scanner instanceof BatchReturnableScanner) {
+ ((BatchReturnableScanner) scanner).returnCurrentBatch();
+ }
+ }
+ }
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
index 76a9d0f..56a2f85 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
@@ -29,7 +29,7 @@ import org.apache.hadoop.hbase.client.Scan;
* Scanner that returns the next KeyValue.
*/
@InterfaceAudience.Private
-public interface KeyValueScanner {
+public interface KeyValueScanner extends BatchReturnableScanner {
/**
* Look at the next Cell in this scanner, but do not iterate scanner.
* @return the next Cell
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java
index 957f417..da8024a 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java
@@ -71,4 +71,8 @@ public abstract class NonLazyKeyValueScanner implements KeyValueScanner {
public Cell getNextIndexedKey() {
return null;
}
+
+ @Override
+ public void returnCurrentBatch() {
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index aedd351..580e11b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -153,7 +153,9 @@ import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.FlushDescriptor;
import org.apache.hadoop.hbase.protobuf.generated.WALProtos.RegionEventDescriptor;
import org.apache.hadoop.hbase.quotas.OperationQuota;
+import org.apache.hadoop.hbase.quotas.OperationQuota.OperationType;
import org.apache.hadoop.hbase.quotas.RegionServerQuotaManager;
+import org.apache.hadoop.hbase.regionserver.HRegion.RegionScannerImpl;
import org.apache.hadoop.hbase.regionserver.Leases.LeaseStillHeldException;
import org.apache.hadoop.hbase.regionserver.Region.FlushResult;
import org.apache.hadoop.hbase.regionserver.Region.Operation;
@@ -287,7 +289,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
if (region != null && region.getCoprocessorHost() != null) {
region.getCoprocessorHost().preScannerClose(s);
}
-
s.close();
if (region != null && region.getCoprocessorHost() != null) {
region.getCoprocessorHost().postScannerClose(s);
@@ -364,8 +365,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
return context != null && context.isClientCellBlockSupport();
}
- private void addResult(final MutateResponse.Builder builder,
- final Result result, final PayloadCarryingRpcController rpcc) {
+ private void addResult(final MutateResponse.Builder builder, final Result result,
+ final PayloadCarryingRpcController rpcc) throws ServiceException {
if (result == null) return;
if (isClientCellBlockSupport()) {
builder.setResult(ProtobufUtil.toResultNoData(result));
@@ -377,16 +378,18 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
}
private void addResults(final ScanResponse.Builder builder, final List results,
- final RpcController controller, boolean isDefaultRegion) {
+ final RpcController controller, boolean isDefaultRegion) throws IOException {
builder.setStale(!isDefaultRegion);
if (results == null || results.isEmpty()) return;
- if (isClientCellBlockSupport()) {
+ // TODO we can avoid this isClientCellBlockSupport() every time.
+ if (isClientCellBlockSupport() && controller instanceof PayloadCarryingRpcController) {
for (Result res : results) {
builder.addCellsPerResult(res.size());
builder.addPartialFlagPerResult(res.isPartial());
}
- ((PayloadCarryingRpcController)controller).
- setCellScanner(CellUtil.createCellScanner(results));
+ //((PayloadCarryingRpcController) controller).setCellScanner(CellUtil.createCellScanner(results));
+ // Set cellBlock as the payload.
+ ((PayloadCarryingRpcController) controller).setCellBlock(CellUtil.createCellScanner(results));
} else {
for (Result res: results) {
ClientProtos.Result pbr = ProtobufUtil.toResult(res);
@@ -562,11 +565,13 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
* @param builder
* @param cellsToReturn Could be null. May be allocated in this method. This is what this
* method returns as a 'result'.
+ * @param controller
* @return Return the cellScanner passed
*/
private List doNonAtomicRegionMutation(final Region region,
final OperationQuota quota, final RegionAction actions, final CellScanner cellScanner,
- final RegionActionResult.Builder builder, List cellsToReturn, long nonceGroup) {
+ final RegionActionResult.Builder builder, List cellsToReturn, long nonceGroup,
+ PayloadCarryingRpcController controller) {
// Gather up CONTIGUOUS Puts and Deletes in this mutations List. Idea is that rather than do
// one at a time, we instead pass them in batch. Be aware that the corresponding
// ResultOrException instance that matches each Put or Delete is then added down in the
@@ -578,6 +583,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
Result r = null;
if (action.hasGet()) {
Get get = ProtobufUtil.toGet(action.getGet());
+ // TODO : Handle cases where for multi() also we could create a cellblock
r = region.get(get);
} else if (action.hasServiceCall()) {
resultOrExceptionBuilder = ResultOrException.newBuilder();
@@ -1873,6 +1879,9 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
}
byte[] row = get.getRow().toByteArray();
byte[] family = get.getColumn(0).getFamily().toByteArray();
+ // Always go with PB mode only and do not go with the Payload mode.
+ // This is because we need the result in the postGetClosedRowhook which
+ // we cannot get in case of Payload based result.
r = region.getClosestRowBefore(row, family);
} else {
Get clientGet = ProtobufUtil.toGet(get);
@@ -1880,9 +1889,22 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
existence = region.getCoprocessorHost().preExists(clientGet);
}
if (existence == null) {
- r = region.get(clientGet);
+ GetContext getCtxt = null;
+ if (controller instanceof PayloadCarryingRpcController && isClientCellBlockSupport()) {
+ getCtxt = new GetContext(
+ (PayloadCarryingRpcController) controller);
+ ((HRegion) region).get(clientGet, getCtxt);
+ } else {
+ r = region.get(clientGet);
+ }
if (get.getExistenceOnly()) {
- boolean exists = r.getExists();
+ boolean exists = false;
+ if (r == null && getCtxt != null) {
+ exists =
+ getCtxt.getCellsCountInBlock() != 0;
+ } else {
+ exists = r.getExists();
+ }
if (region.getCoprocessorHost() != null) {
exists = region.getCoprocessorHost().postExists(clientGet, exists);
}
@@ -1890,16 +1912,32 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
}
}
}
- if (existence != null){
- ClientProtos.Result pbr =
- ProtobufUtil.toResult(existence, region.getRegionInfo().getReplicaId() != 0);
+ if (existence != null) {
+ ClientProtos.Result pbr = ProtobufUtil.toResult(existence, region.getRegionInfo()
+ .getReplicaId() != 0);
builder.setResult(pbr);
} else if (r != null) {
+ // don't copy the results here because already we have done the copy
ClientProtos.Result pbr = ProtobufUtil.toResult(r);
builder.setResult(pbr);
+ } else {
+ ClientProtos.Result.Builder pbr = ClientProtos.Result.newBuilder();
+ pbr.setAssociatedCellCount(((PayloadCarryingRpcController) controller)
+ .getCellsCountInBlock());
+ boolean stale = region.getRegionInfo().getReplicaId() != 0;
+ pbr.setStale(stale);
+ builder.setResult(pbr);
}
+
if (r != null) {
quota.addGetResult(r);
+ } else {
+ // When result is null, the result cell bytes are set as payload in controller
+ if (controller != null
+ && ((PayloadCarryingRpcController) controller).getCellBlock() != null) {
+ quota.addOperationSize(OperationType.GET, ((PayloadCarryingRpcController) controller)
+ .getCellBlock().limit());
+ }
}
return builder.build();
} catch (IOException ie) {
@@ -1935,7 +1973,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
// It is also the conduit via which we pass back data.
PayloadCarryingRpcController controller = (PayloadCarryingRpcController)rpcc;
CellScanner cellScanner = controller != null ? controller.cellScanner(): null;
- if (controller != null) controller.setCellScanner(null);
+ if (controller != null) controller.clearPayload();
long nonceGroup = request.hasNonceGroup() ? request.getNonceGroup() : HConstants.NO_NONCE;
@@ -1992,7 +2030,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
} else {
// doNonAtomicRegionMutation manages the exception internally
cellsToReturn = doNonAtomicRegionMutation(region, quota, regionAction, cellScanner,
- regionActionResultBuilder, cellsToReturn, nonceGroup);
+ regionActionResultBuilder, cellsToReturn, nonceGroup, controller);
}
responseBuilder.addRegionActionResult(regionActionResultBuilder.build());
quota.close();
@@ -2021,7 +2059,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
CellScanner cellScanner = controller != null? controller.cellScanner(): null;
OperationQuota quota = null;
// Clear scanner so we are not holding on to reference across call.
- if (controller != null) controller.setCellScanner(null);
+ if (controller != null) controller.clearPayload();
try {
checkOpen();
requestCount.increment();
@@ -2174,6 +2212,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
boolean moreResults = true;
boolean closeScanner = false;
boolean isSmallScan = false;
+ RegionScanner actualRegionScanner = null;
ScanResponse.Builder builder = ScanResponse.newBuilder();
if (request.hasCloseScanner()) {
closeScanner = request.getCloseScanner();
@@ -2218,11 +2257,29 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
scanner = region.getCoprocessorHost().preScannerOpen(scan);
}
if (scanner == null) {
- scanner = region.getScanner(scan);
+ if (controller instanceof PayloadCarryingRpcController && isClientCellBlockSupport()) {
+ // always pass the controller to indicate this scan comes from a client
+ scanner = ((HRegion) region)
+ .getScanner(scan, true);
+ } else {
+ scanner = region.getScanner(scan);
+ }
}
+ actualRegionScanner = scanner;
+ // TODO why post hook is called even when pre hook returned a RegionScanner. Fix
if (region.getCoprocessorHost() != null) {
scanner = region.getCoprocessorHost().postScannerOpen(scan, scanner);
}
+ if (actualRegionScanner != scanner && !(scanner instanceof BatchReturnableScanner)) {
+ // It means the RegionScanner has been wrapped
+ if (actualRegionScanner instanceof RegionScannerImpl) {
+ // Copy the results when nextRaw is called from the CP so that
+ // CP can have a cloned version of the results without bothering
+ // about the eviction. Ugly, yes!!!. If the CP impl does not use
+ // the actual scanner we are gone ???
+ ((RegionScannerImpl) actualRegionScanner).setCopyCellsFromSharedMem(true);
+ }
+ }
scannerId = addScanner(scanner, region);
scannerName = String.valueOf(scannerId);
ttl = this.scannerLeaseTimeoutPeriod;
@@ -2445,7 +2502,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
moreResults = false;
results = null;
} else {
- addResults(builder, results, controller, RegionReplicaUtil.isDefaultReplica(region.getRegionInfo()));
+ addResults(builder, results, controller,
+ RegionReplicaUtil.isDefaultReplica(region.getRegionInfo()));
}
} catch (IOException e) {
// if we have an exception on scanner next and we are using the callSeq
@@ -2456,6 +2514,11 @@ public class RSRpcServices implements HBaseRPCErrorHandler,
}
throw e;
} finally {
+ // clear the block references here.
+ // The nextRaw() call would handle the new block
+ if (scanner instanceof BatchReturnableScanner) {
+ ((BatchReturnableScanner) scanner).returnCurrentBatch();
+ }
// We're done. On way out re-add the above removed lease.
// Adding resets expiration time on lease.
if (scanners.containsKey(scannerName)) {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java
index 5167b4e..6914132 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedKeyValueHeap.java
@@ -85,7 +85,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap {
}
if (!scanner.seekToPreviousRow(seekKey)) {
- scanner.close();
+ this.scannersForDelayedClose.add(scanner);
} else {
heap.add(scanner);
}
@@ -114,7 +114,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap {
return current != null;
}
if (!scanner.backwardSeek(seekKey)) {
- scanner.close();
+ this.scannersForDelayedClose.add(scanner);
} else {
heap.add(scanner);
}
@@ -134,7 +134,7 @@ public class ReversedKeyValueHeap extends KeyValueHeap {
if (this.current.seekToPreviousRow(kvReturn)) {
this.heap.add(this.current);
} else {
- this.current.close();
+ this.scannersForDelayedClose.add(this.current);
}
this.current = pollRealKV();
} else {
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java
index d7ef6e8..ab23b65 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ReversedRegionScannerImpl.java
@@ -39,12 +39,13 @@ class ReversedRegionScannerImpl extends RegionScannerImpl {
* @param scan
* @param additionalScanners
* @param region
+ * @param isCellBlockSupported
* @throws IOException
*/
ReversedRegionScannerImpl(Scan scan,
- List additionalScanners, HRegion region)
+ List additionalScanners, HRegion region, boolean isCellBlockSupported)
throws IOException {
- region.super(scan, additionalScanners, region);
+ region.super(scan, additionalScanners, region, isCellBlockSupported);
}
@Override
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
index eba3689..cb372e3 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java
@@ -58,6 +58,7 @@ import org.apache.hadoop.hbase.util.BloomFilterWriter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Writables;
import org.apache.hadoop.io.WritableUtils;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
@@ -1049,6 +1050,7 @@ public class StoreFile {
private long deleteFamilyCnt = -1;
private boolean bulkLoadResult = false;
private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
+ private CacheConfig cacheConf;
public Reader(FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf)
throws IOException {
@@ -1058,6 +1060,7 @@ public class StoreFile {
public Reader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size,
CacheConfig cacheConf, Configuration conf) throws IOException {
+ this.cacheConf = cacheConf;
reader = HFile.createReader(fs, path, in, size, cacheConf, conf);
bloomFilterType = BloomType.NONE;
}
@@ -1284,7 +1287,7 @@ public class StoreFile {
// Empty file
if (reader.getTrailer().getEntryCount() == 0)
return false;
-
+ HFileBlock bloomBlock = null;
try {
boolean shouldCheckBloom;
ByteBuffer bloom;
@@ -1292,8 +1295,8 @@ public class StoreFile {
bloom = null;
shouldCheckBloom = true;
} else {
- bloom = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY,
- true);
+ bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true);
+ bloom = bloomBlock.getBufferWithoutHeader();
shouldCheckBloom = bloom != null;
}
@@ -1345,8 +1348,9 @@ public class StoreFile {
} catch (IllegalArgumentException e) {
LOG.error("Bad bloom filter data -- proceeding without", e);
setGeneralBloomFilterFaulty();
+ } finally {
+ reader.returnBlock(bloomBlock);
}
-
return true;
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
index 42a378d..dff9471 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -27,8 +27,6 @@ import java.util.List;
import java.util.SortedSet;
import java.util.concurrent.atomic.AtomicLong;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
@@ -46,7 +44,6 @@ import org.apache.hadoop.hbase.regionserver.StoreFile.Reader;
*/
@InterfaceAudience.LimitedPrivate("Coprocessor")
public class StoreFileScanner implements KeyValueScanner {
- private static final Log LOG = LogFactory.getLog(HStore.class);
// the reader it comes from:
private final StoreFile.Reader reader;
@@ -158,7 +155,7 @@ public class StoreFileScanner implements KeyValueScanner {
try {
try {
if(!seekAtOrAfter(hfs, key)) {
- close();
+ cur = null;
return false;
}
@@ -185,7 +182,7 @@ public class StoreFileScanner implements KeyValueScanner {
try {
try {
if (!reseekAtOrAfter(hfs, key)) {
- close();
+ cur = null;
return false;
}
setCurrentCell(hfs.getKeyValue());
@@ -229,7 +226,6 @@ public class StoreFileScanner implements KeyValueScanner {
}
if (cur == null) {
- close();
return false;
}
@@ -237,8 +233,8 @@ public class StoreFileScanner implements KeyValueScanner {
}
public void close() {
- // Nothing to close on HFileScanner?
cur = null;
+ this.hfs.close();
}
/**
@@ -429,7 +425,7 @@ public class StoreFileScanner implements KeyValueScanner {
key.getRowLength());
if (seekCount != null) seekCount.incrementAndGet();
if (!hfs.seekBefore(seekKey)) {
- close();
+ this.cur = null;
return false;
}
KeyValue firstKeyOfPreviousRow = KeyValueUtil.createFirstOnRow(hfs.getKeyValue()
@@ -437,7 +433,7 @@ public class StoreFileScanner implements KeyValueScanner {
if (seekCount != null) seekCount.incrementAndGet();
if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) {
- close();
+ this.cur = null;
return false;
}
@@ -494,4 +490,9 @@ public class StoreFileScanner implements KeyValueScanner {
public Cell getNextIndexedKey() {
return hfs.getNextIndexedKey();
}
+
+ @Override
+ public void returnCurrentBatch() {
+ this.hfs.returnCurrentBatch();
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
index cbca57b..3a32151 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@@ -22,8 +22,12 @@ package org.apache.hadoop.hbase.regionserver;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
import java.util.List;
import java.util.NavigableSet;
+import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.locks.ReentrantLock;
@@ -53,8 +57,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
* into List for a single row.
*/
@InterfaceAudience.Private
-public class StoreScanner extends NonReversedNonLazyKeyValueScanner
- implements KeyValueScanner, InternalScanner, ChangedReadersObserver {
+public class StoreScanner extends NonReversedNonLazyKeyValueScanner implements KeyValueScanner,
+ InternalScanner, ChangedReadersObserver {
private static final Log LOG = LogFactory.getLog(StoreScanner.class);
protected Store store;
protected ScanQueryMatcher matcher;
@@ -84,6 +88,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
protected final long maxRowSize;
protected final long cellsPerHeartbeatCheck;
+ // Collects all the KVHeap that are eagerly getting closed during the
+ // course of a scan
+ protected Set heapsForDelayedClose = new HashSet();
+
/**
* The number of KVs seen by the scanner. Includes explicitly skipped KVs, but not
* KVs skipped via seeking to next row/column. TODO: estimate them?
@@ -371,9 +379,8 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
}
}
- protected void resetKVHeap(List extends KeyValueScanner> scanners,
- CellComparator comparator) throws IOException {
- // Combine all seeked scanners with a heap
+ protected void resetKVHeap(List extends KeyValueScanner> scanners, CellComparator comparator)
+ throws IOException {
heap = new KeyValueHeap(scanners, comparator);
}
@@ -437,17 +444,38 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
@Override
public void close() {
+ close(true);
+ }
+
+ protected void close(boolean heapClose) {
lock.lock();
try {
- if (this.closing) return;
- this.closing = true;
- // under test, we dont have a this.store
- if (this.store != null)
- this.store.deleteChangedReaderObserver(this);
- if (this.heap != null)
- this.heap.close();
- this.heap = null; // CLOSED!
- this.lastTop = null; // If both are null, we are closed.
+ if (this.closing) {
+ if(heapClose) {
+ returnCurrentBatch();
+ }
+ return;
+ }
+ this.closing = true;
+ // under test, we dont have a this.store
+ if (this.store != null)
+ this.store.deleteChangedReaderObserver(this);
+ if (heapClose) {
+ for (KeyValueHeap h : this.heapsForDelayedClose) {
+ h.close();
+ }
+ this.heapsForDelayedClose.clear();
+ if (this.heap != null) {
+ this.heap.close();
+ this.heap = null; // CLOSED!
+ }
+ } else {
+ if (this.heap != null) {
+ this.heapsForDelayedClose.add(this.heap);
+ this.heap = null;
+ }
+ }
+ this.lastTop = null; // If both are null, we are closed.
} finally {
lock.unlock();
}
@@ -491,13 +519,16 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
// if the heap was left null, then the scanners had previously run out anyways, close and
// return.
if (this.heap == null) {
- close();
+ // By this time partial close should happened because already heap is null
+ close(false);
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
}
Cell peeked = this.heap.peek();
if (peeked == null) {
- close();
+ // This is a case where we for sure know that the heap is not going to give us any more results
+ // we could mark the heap to be closed except that we cannot return the blocks
+ close(false);
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
}
@@ -537,7 +568,6 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
if (prevCell != cell) ++kvsScanned; // Do object compare - we set prevKV from the same heap.
checkScanOrder(prevCell, cell, comparator);
prevCell = cell;
-
ScanQueryMatcher.MatchCode qcode = matcher.match(cell);
qcode = optimize(qcode, cell);
switch(qcode) {
@@ -604,7 +634,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
case DONE_SCAN:
- close();
+ close(false);
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
case SEEK_NEXT_ROW:
@@ -645,7 +675,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
}
// No more keys
- close();
+ close(false);
return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
} finally {
lock.unlock();
@@ -704,8 +734,8 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
//DebugPrint.println("SS updateReaders, topKey = " + lastTop);
- // close scanners to old obsolete Store files
- this.heap.close(); // bubble thru and close all scanners.
+ // add all the scanners to be closed to this list
+ this.heapsForDelayedClose.add(this.heap);
this.heap = null; // the re-seeks could be slow (access HDFS) free up memory ASAP
// Let the next() call handle re-creating and seeking
@@ -884,5 +914,21 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner
public Cell getNextIndexedKey() {
return this.heap.getNextIndexedKey();
}
+
+ @Override
+ public void returnCurrentBatch() {
+ if (heap != null) {
+ this.heap.returnCurrentBatch();
+ }
+ if (heapsForDelayedClose != null) {
+ Iterator iterator = heapsForDelayedClose.iterator();
+ while (iterator.hasNext()) {
+ KeyValueHeap scanner = iterator.next();
+ if (scanner != null && scanner instanceof BatchReturnableScanner) {
+ ((BatchReturnableScanner) scanner).returnCurrentBatch();
+ }
+ }
+ }
+ }
}
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
index 984742f..8b05d8f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java
@@ -120,6 +120,8 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase
result = BloomFilterUtil.contains(key, keyOffset, keyLength,
bloomBuf, bloomBlock.headerSize(),
bloomBlock.getUncompressedSizeWithoutHeader(), hash, hashCount);
+ // After the use return back the block if it was served from a cache.
+ reader.returnBlock(bloomBlock);
}
if (numQueriesPerChunk != null && block >= 0) {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
index f376e3d..0145cec 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java
@@ -1646,6 +1646,24 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility {
return (HTable) getConnection().getTable(tableName);
}
+ public HTable createTable(TableName tableName, byte[][] families,
+ int numVersions, int blockSize, String cpName) throws IOException {
+ HTableDescriptor desc = new HTableDescriptor(tableName);
+ for (byte[] family : families) {
+ HColumnDescriptor hcd = new HColumnDescriptor(family)
+ .setMaxVersions(numVersions)
+ .setBlocksize(blockSize);
+ desc.addFamily(hcd);
+ }
+ if(cpName != null) {
+ desc.addCoprocessor(cpName);
+ }
+ getHBaseAdmin().createTable(desc);
+ // HBaseAdmin only waits for regions to appear in hbase:meta we should wait until they are assigned
+ waitUntilAllRegionsAssigned(tableName);
+ return (HTable) getConnection().getTable(tableName);
+ }
+
/**
* Create a table.
* @param tableName
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java
index 5c0284f..4f549e4 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestAcidGuarantees.java
@@ -90,6 +90,10 @@ public class TestAcidGuarantees implements Tool {
// prevent aggressive region split
conf.set(HConstants.HBASE_REGION_SPLIT_POLICY_KEY,
ConstantSizeRegionSplitPolicy.class.getName());
+ conf.setInt("hbase.bucketcache.size", 400);
+ conf.setStrings("hbase.bucketcache.ioengine", "heap");
+ conf.setFloat("hfile.block.cache.size", 0.2f);
+ conf.setFloat("hbase.regionserver.global.memstore.size", 0.1f);
util = new HBaseTestingUtility(conf);
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java
new file mode 100644
index 0000000..4cfab35
--- /dev/null
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestBlockEvictionFromClient.java
@@ -0,0 +1,1320 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
+import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
+import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint;
+import org.apache.hadoop.hbase.coprocessor.ObserverContext;
+import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
+import org.apache.hadoop.hbase.io.hfile.BlockCache;
+import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
+import org.apache.hadoop.hbase.io.hfile.CacheConfig;
+import org.apache.hadoop.hbase.io.hfile.CachedBlock;
+import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache;
+import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.regionserver.Region;
+import org.apache.hadoop.hbase.regionserver.RegionScanner;
+import org.apache.hadoop.hbase.regionserver.ScannerContext;
+import org.apache.hadoop.hbase.regionserver.Store;
+import org.apache.hadoop.hbase.testclassification.ClientTests;
+import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.After;
+import org.junit.AfterClass;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ LargeTests.class, ClientTests.class })
+@SuppressWarnings("deprecation")
+public class TestBlockEvictionFromClient {
+ private static final Log LOG = LogFactory.getLog(TestBlockEvictionFromClient.class);
+ protected final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+ static byte[][] ROWS = new byte[2][];
+ private static int NO_OF_THREADS = 3;
+ private static byte[] ROW = Bytes.toBytes("testRow");
+ private static byte[] ROW1 = Bytes.toBytes("testRow1");
+ private static byte[] FAMILY = Bytes.toBytes("testFamily");
+ private static byte[][] FAMILIES_1 = new byte[1][0];
+ private static byte[] QUALIFIER = Bytes.toBytes("testQualifier");
+ private static byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ private static byte[] data = new byte[1000];
+ private static byte[] data2 = Bytes.add(data, data);
+ protected static int SLAVES = 1;
+ private static CountDownLatch latch;
+ private static CountDownLatch getLatch;
+ private static CountDownLatch compactionLatch;
+ private static CountDownLatch exceptionLatch;
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeClass
+ public static void setUpBeforeClass() throws Exception {
+ ROWS[0] = ROW;
+ ROWS[1] = ROW1;
+ Configuration conf = TEST_UTIL.getConfiguration();
+ conf.setStrings(CoprocessorHost.REGION_COPROCESSOR_CONF_KEY,
+ MultiRowMutationEndpoint.class.getName());
+ conf.setBoolean("hbase.table.sanity.checks", true); // enable for below
+ // tests
+ conf.setInt("hbase.regionserver.handler.count", 20);
+ conf.setInt("hbase.bucketcache.size", 400);
+ conf.setStrings("hbase.bucketcache.ioengine", "heap");
+ conf.setFloat("hfile.block.cache.size", 0.2f);
+ conf.setFloat("hbase.regionserver.global.memstore.size", 0.1f);
+ conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
+ conf.setInt(HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, 5000);
+ FAMILIES_1[0] = FAMILY;
+ TEST_UTIL.startMiniCluster(SLAVES);
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @AfterClass
+ public static void tearDownAfterClass() throws Exception {
+ TEST_UTIL.shutdownMiniCluster();
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @Before
+ public void setUp() throws Exception {
+ CustomInnerRegionObserver.waitForGets.set(false);
+ CustomInnerRegionObserver.countOfNext.set(0);
+ CustomInnerRegionObserver.countOfGets.set(0);
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @After
+ public void tearDown() throws Exception {
+ if (latch != null) {
+ while (latch.getCount() > 0) {
+ latch.countDown();
+ }
+ }
+ if (getLatch != null) {
+ getLatch.countDown();
+ }
+ if (compactionLatch != null) {
+ compactionLatch.countDown();
+ }
+ if (exceptionLatch != null) {
+ exceptionLatch.countDown();
+ }
+ latch = null;
+ getLatch = null;
+ compactionLatch = null;
+ exceptionLatch = null;
+ CustomInnerRegionObserver.throwException.set(false);
+ // Clean up the tables for every test case
+ TableName[] listTableNames = TEST_UTIL.getHBaseAdmin().listTableNames();
+ for (TableName tableName : listTableNames) {
+ if (!tableName.isSystemTable()) {
+ TEST_UTIL.getHBaseAdmin().disableTable(tableName);
+ TEST_UTIL.getHBaseAdmin().deleteTable(tableName);
+ }
+ }
+ }
+
+ @Test
+ public void testBlockEvictionWithParallelScans() throws Exception {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testBlockEvictionWithParallelScans");
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+
+ // insert data. 2 Rows are added
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data));
+ // data was in memstore so don't expect any changes
+ // flush the data
+ System.out.println("Flushing cache in problematic area");
+ // Should create one Hfile with 2 blocks
+ region.flush(true);
+ // Load cache
+ // Create three sets of scan
+ ScanThread[] scanThreads = initiateScan(table, false);
+ Thread.sleep(100);
+ checkForBlockEviction(cache, false, false, false);
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ // CustomInnerRegionObserver.sleepTime.set(0);
+ Iterator iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // read the data and expect same blocks, one new hit, no misses
+ assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data));
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // Check how this miss is happening
+ // insert a second column, read the row, no new blocks, 3 new hits
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ byte[] data2 = Bytes.add(data, data);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ Result r = table.get(new Get(ROW));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2));
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // flush, one new block
+ System.out.println("Flushing cache");
+ region.flush(true);
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // compact, net minus two blocks, two hits, no misses
+ System.out.println("Compacting");
+ assertEquals(2, store.getStorefilesCount());
+ store.triggerMajorCompaction();
+ region.compact(true);
+ waitForStoreFileCount(store, 1, 10000); // wait 10 seconds max
+ assertEquals(1, store.getStorefilesCount());
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // read the row, this should be a cache miss because we don't cache data
+ // blocks on compaction
+ r = table.get(new Get(ROW));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2));
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ public void testParallelGetsAndScans() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(2);
+ // Check if get() returns blocks on its close() itself
+ getLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testParallelGetsAndScans");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+
+ insertData(table);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ region.flush(true);
+ // Create three sets of scan
+ CustomInnerRegionObserver.waitForGets.set(true);
+ ScanThread[] scanThreads = initiateScan(table, false);
+ // Create three sets of gets
+ GetThread[] getThreads = initiateGet(table, false, false);
+ checkForBlockEviction(cache, false, false, false);
+ CustomInnerRegionObserver.waitForGets.set(false);
+ checkForBlockEviction(cache, false, false, false);
+ for (GetThread thread : getThreads) {
+ thread.join();
+ }
+ // Verify whether the gets have returned the blocks that it had
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // giving some time for the block to be decremented
+ checkForBlockEviction(cache, true, false, false);
+ getLatch.countDown();
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ System.out.println("Scans should have returned the bloks");
+ // Check with either true or false
+ CustomInnerRegionObserver.waitForGets.set(false);
+ // The scan should also have released the blocks by now
+ checkForBlockEviction(cache, true, true, false);
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ public void testGetWithCellsInDifferentFiles() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ // Check if get() returns blocks on its close() itself
+ getLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testGetWithCellsInDifferentFiles");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ region.flush(true);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // Create three sets of gets
+ GetThread[] getThreads = initiateGet(table, false, false);
+ Thread.sleep(200);
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (GetThread thread : getThreads) {
+ thread.join();
+ }
+ // Verify whether the gets have returned the blocks that it had
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // giving some time for the block to be decremented
+ checkForBlockEviction(cache, true, false, false);
+ getLatch.countDown();
+ System.out.println("Gets should have returned the bloks");
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ // TODO : check how block index works here
+ public void testGetsWithMultiColumnsAndExplicitTracker() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ // Check if get() returns blocks on its close() itself
+ getLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testGetsWithMultiColumnsAndExplicitTracker");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ BlockCache cache = setCacheProperties(region);
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ for (int i = 1; i < 10; i++) {
+ put = new Put(ROW);
+ put.add(FAMILY, Bytes.toBytes("testQualifier" + i), data2);
+ table.put(put);
+ if (i % 2 == 0) {
+ region.flush(true);
+ }
+ }
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ region.flush(true);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // Create three sets of gets
+ GetThread[] getThreads = initiateGet(table, true, false);
+ Thread.sleep(200);
+ Iterator iterator = cache.iterator();
+ boolean usedBlocksFound = false;
+ int refCount = 0;
+ int noOfBlocksWithRef = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ System.out.println("The refCount is " + refCount);
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ noOfBlocksWithRef++;
+ }
+ }
+ assertTrue(usedBlocksFound);
+ // the number of blocks referred
+ assertEquals(10, noOfBlocksWithRef);
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (GetThread thread : getThreads) {
+ thread.join();
+ }
+ // Verify whether the gets have returned the blocks that it had
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // giving some time for the block to be decremented
+ checkForBlockEviction(cache, true, false, false);
+ getLatch.countDown();
+ System.out.println("Gets should have returned the bloks");
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ public void testGetWithMultipleColumnFamilies() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ // Check if get() returns blocks on its close() itself
+ getLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testGetWithMultipleColumnFamilies");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ byte[][] fams = new byte[10][];
+ fams[0] = FAMILY;
+ for (int i = 1; i < 10; i++) {
+ fams[i] = (Bytes.toBytes("testFamily" + i));
+ }
+ table = TEST_UTIL.createTable(tableName, fams, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ BlockCache cache = setCacheProperties(region);
+
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ for (int i = 1; i < 10; i++) {
+ put = new Put(ROW);
+ put.add(Bytes.toBytes("testFamily" + i), Bytes.toBytes("testQualifier" + i), data2);
+ table.put(put);
+ if (i % 2 == 0) {
+ region.flush(true);
+ }
+ }
+ region.flush(true);
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ region.flush(true);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // Create three sets of gets
+ GetThread[] getThreads = initiateGet(table, true, true);
+ Thread.sleep(200);
+ Iterator iterator = cache.iterator();
+ boolean usedBlocksFound = false;
+ int refCount = 0;
+ int noOfBlocksWithRef = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ System.out.println("The refCount is " + refCount);
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ noOfBlocksWithRef++;
+ }
+ }
+ assertTrue(usedBlocksFound);
+ // the number of blocks referred
+ assertEquals(3, noOfBlocksWithRef);
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (GetThread thread : getThreads) {
+ thread.join();
+ }
+ // Verify whether the gets have returned the blocks that it had
+ CustomInnerRegionObserver.waitForGets.set(true);
+ // giving some time for the block to be decremented
+ checkForBlockEviction(cache, true, false, false);
+ getLatch.countDown();
+ System.out.println("Gets should have returned the bloks");
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ //@Test
+ public void testScanWithMultipleColumnFamilies() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ // Check if get() returns blocks on its close() itself
+ TableName tableName = TableName.valueOf("testScanWithMultipleColumnFamilies");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ byte[][] fams = new byte[10][];
+ fams[0] = FAMILY;
+ for (int i = 1; i < 10; i++) {
+ fams[i] = (Bytes.toBytes("testFamily" + i));
+ }
+ table = TEST_UTIL.createTable(tableName, fams, 1, 1024,
+ CustomInnerRegionObserver.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ BlockCache cache = setCacheProperties(region);
+
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ region.flush(true);
+ for (int i = 1; i < 10; i++) {
+ put = new Put(ROW);
+ put.add(Bytes.toBytes("testFamily" + i), Bytes.toBytes("testQualifier" + i), data2);
+ table.put(put);
+ if (i % 2 == 0) {
+ region.flush(true);
+ }
+ }
+ region.flush(true);
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ region.flush(true);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ // Create three sets of gets
+ ScanThread[] scanThreads = initiateScan(table, true);
+ Thread.sleep(200);
+ Iterator iterator = cache.iterator();
+ boolean usedBlocksFound = false;
+ int refCount = 0;
+ int noOfBlocksWithRef = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ System.out.println("The refCount is " + refCount);
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ noOfBlocksWithRef++;
+ }
+ }
+ assertTrue(usedBlocksFound);
+ // the number of blocks referred
+ assertEquals(12, noOfBlocksWithRef);
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ // giving some time for the block to be decremented
+ checkForBlockEviction(cache, true, false, false);
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ private BlockCache setCacheProperties(Region region) {
+ Iterator strItr = region.getStores().iterator();
+ BlockCache cache = null;
+ while (strItr.hasNext()) {
+ Store store = strItr.next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ // Use the last one
+ cache = cacheConf.getBlockCache();
+ }
+ return cache;
+ }
+
+ @Test
+ public void testParallelGetsAndScanWithWrappedRegionScanner() throws IOException,
+ InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(2);
+ // Check if get() returns blocks on its close() itself
+ getLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testParallelGetsAndScanWithWrappedRegionScanner");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserverWrapper.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+
+ // insert data. 2 Rows are added
+ insertData(table);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ region.flush(true);
+ // CustomInnerRegionObserver.sleepTime.set(5000);
+ // Create three sets of scan
+ CustomInnerRegionObserver.waitForGets.set(true);
+ ScanThread[] scanThreads = initiateScan(table, false);
+ // Create three sets of gets
+ GetThread[] getThreads = initiateGet(table, false, false);
+ // The block would have been decremented for the scan case as it was
+ // wrapped
+ // before even the postNext hook gets executed.
+ // giving some time for the block to be decremented
+ Thread.sleep(100);
+ CustomInnerRegionObserver.waitForGets.set(false);
+ checkForBlockEviction(cache, false, false, true);
+ // countdown the latch
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (GetThread thread : getThreads) {
+ thread.join();
+ }
+ getLatch.countDown();
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ public void testScanWithCompaction() throws IOException, InterruptedException {
+ testScanWithCompactionInternals("testScanWithCompaction", false);
+ }
+
+ @Test
+ public void testReverseScanWithCompaction() throws IOException, InterruptedException {
+ testScanWithCompactionInternals("testReverseScanWithCompaction", true);
+ }
+
+ private void testScanWithCompactionInternals(String tableNameStr, boolean reversed)
+ throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ compactionLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf(tableNameStr);
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserverWrapper.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+
+ // insert data. 2 Rows are added
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ assertTrue(Bytes.equals(table.get(new Get(ROW)).value(), data));
+ // Should create one Hfile with 2 blocks
+ region.flush(true);
+ // read the data and expect same blocks, one new hit, no misses
+ int refCount = 0;
+ // Check how this miss is happening
+ // insert a second column, read the row, no new blocks, 3 new hits
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ byte[] data2 = Bytes.add(data, data);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ // flush, one new block
+ System.out.println("Flushing cache");
+ region.flush(true);
+ Iterator iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ // Create three sets of scan
+ ScanThread[] scanThreads = initiateScan(table, reversed);
+ Thread.sleep(100);
+ iterator = cache.iterator();
+ boolean usedBlocksFound = false;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ }
+ }
+ assertTrue("Blocks with non zero ref count should be found ", usedBlocksFound);
+ usedBlocksFound = false;
+ System.out.println("Compacting");
+ assertEquals(2, store.getStorefilesCount());
+ store.triggerMajorCompaction();
+ region.compact(true);
+ waitForStoreFileCount(store, 1, 10000); // wait 10 seconds max
+ assertEquals(1, store.getStorefilesCount());
+ // Even after compaction is done we will have some blocks that cannot
+ // be evicted this is because the scan is still referencing them
+ iterator = cache.iterator();
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3 as they are not yet cleared
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ }
+ }
+ assertTrue("Blocks with non zero ref count should be found ", usedBlocksFound);
+ // Should not throw exception
+ compactionLatch.countDown();
+ latch.countDown();
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ // by this time all blocks should have been evicted
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ Result r = table.get(new Get(ROW));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2));
+ // The gets would be working on new blocks
+ iterator = cache.iterator();
+ iterateBlockCache(cache, iterator);
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ @Test
+ public void testScanWithException() throws IOException, InterruptedException {
+ HTable table = null;
+ try {
+ latch = new CountDownLatch(1);
+ exceptionLatch = new CountDownLatch(1);
+ TableName tableName = TableName.valueOf("testScanWithException");
+ // Create KV that will give you two blocks
+ // Create a table with block size as 1024
+ table = TEST_UTIL.createTable(tableName, FAMILIES_1, 1, 1024,
+ CustomInnerRegionObserverWrapper.class.getName());
+ // get the block cache and region
+ String regionName = table.getRegionLocations().firstKey().getEncodedName();
+ Region region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getFromOnlineRegions(
+ regionName);
+ Store store = region.getStores().iterator().next();
+ CacheConfig cacheConf = store.getCacheConfig();
+ cacheConf.setCacheDataOnWrite(true);
+ cacheConf.setEvictOnClose(true);
+ BlockCache cache = cacheConf.getBlockCache();
+ // insert data. 2 Rows are added
+ insertData(table);
+ // flush the data
+ System.out.println("Flushing cache");
+ // Should create one Hfile with 2 blocks
+ region.flush(true);
+ // CustomInnerRegionObserver.sleepTime.set(5000);
+ CustomInnerRegionObserver.throwException.set(true);
+ ScanThread[] scanThreads = initiateScan(table, false);
+ // The block would have been decremented for the scan case as it was
+ // wrapped
+ // before even the postNext hook gets executed.
+ // giving some time for the block to be decremented
+ Thread.sleep(100);
+ Iterator iterator = cache.iterator();
+ boolean usedBlocksFound = false;
+ int refCount = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ }
+ }
+ assertTrue(usedBlocksFound);
+ exceptionLatch.countDown();
+ // countdown the latch
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ for (ScanThread thread : scanThreads) {
+ thread.join();
+ }
+ iterator = cache.iterator();
+ usedBlocksFound = false;
+ refCount = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ }
+ }
+ assertTrue(usedBlocksFound);
+ // Sleep till the scan lease would expire? Can we reduce this value?
+ Thread.sleep(5100);
+ iterator = cache.iterator();
+ usedBlocksFound = false;
+ refCount = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ System.out.println("The ref count is "+refCount);
+ if (refCount != 0) {
+ // Blocks will be with count 3
+ assertEquals(NO_OF_THREADS, refCount);
+ usedBlocksFound = true;
+ }
+ }
+ assertFalse(usedBlocksFound);
+ } finally {
+ if (table != null) {
+ table.close();
+ }
+ }
+ }
+
+ private void iterateBlockCache(BlockCache cache, Iterator iterator) {
+ int refCount;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ assertEquals(0, refCount);
+ }
+ }
+
+ private void insertData(HTable table) throws IOException {
+ Put put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ put = new Put(ROW1);
+ put.add(FAMILY, QUALIFIER, data);
+ table.put(put);
+ byte[] QUALIFIER2 = Bytes.add(QUALIFIER, QUALIFIER);
+ put = new Put(ROW);
+ put.add(FAMILY, QUALIFIER2, data2);
+ table.put(put);
+ }
+
+ private ScanThread[] initiateScan(HTable table, boolean reverse) throws IOException,
+ InterruptedException {
+ ScanThread[] scanThreads = new ScanThread[NO_OF_THREADS];
+ for (int i = 0; i < NO_OF_THREADS; i++) {
+ scanThreads[i] = new ScanThread(table, reverse);
+ }
+ for (ScanThread thread : scanThreads) {
+ thread.start();
+ }
+ return scanThreads;
+ }
+
+ private GetThread[] initiateGet(HTable table, boolean tracker, boolean multipleCFs)
+ throws IOException, InterruptedException {
+ GetThread[] getThreads = new GetThread[NO_OF_THREADS];
+ for (int i = 0; i < NO_OF_THREADS; i++) {
+ getThreads[i] = new GetThread(table, tracker, multipleCFs);
+ }
+ for (GetThread thread : getThreads) {
+ thread.start();
+ }
+ return getThreads;
+ }
+
+ private void checkForBlockEviction(BlockCache cache, boolean getClosed, boolean expectOnlyZero,
+ boolean wrappedCp) throws InterruptedException {
+ int counter = NO_OF_THREADS;
+ if (CustomInnerRegionObserver.waitForGets.get()) {
+ // Because only one row is selected, it has only 2 blocks
+ counter = counter - 1;
+ while (CustomInnerRegionObserver.countOfGets.get() < NO_OF_THREADS) {
+ Thread.sleep(100);
+ }
+ } else {
+ while (CustomInnerRegionObserver.countOfNext.get() < NO_OF_THREADS) {
+ Thread.sleep(100);
+ }
+ }
+ Iterator iterator = cache.iterator();
+ int refCount = 0;
+ while (iterator.hasNext()) {
+ CachedBlock next = iterator.next();
+ BlockCacheKey cacheKey = new BlockCacheKey(next.getFilename(), next.getOffset());
+ if (cache instanceof BucketCache) {
+ refCount = ((BucketCache) cache).getRefCount(cacheKey);
+ } else if (cache instanceof CombinedBlockCache) {
+ refCount = ((CombinedBlockCache) cache).getRefCount(cacheKey);
+ } else {
+ continue;
+ }
+ System.out.println(" the refcount is " + refCount + " block is " + cacheKey);
+ if (CustomInnerRegionObserver.waitForGets.get()) {
+ if (expectOnlyZero) {
+ assertTrue(refCount == 0);
+ }
+ if (refCount != 0) {
+ // Because the scan would have also touched up on these blocks but
+ // it
+ // would have touched
+ // all 3
+ if (getClosed) {
+ // If get has closed only the scan's blocks would be available
+ assertEquals(refCount, CustomInnerRegionObserver.countOfGets.get());
+ } else {
+ assertEquals(refCount, CustomInnerRegionObserver.countOfGets.get() + (NO_OF_THREADS));
+ }
+ }
+ } else {
+ // Because the get would have also touched up on these blocks but it
+ // would have touched
+ // upon only 2 additionally
+ if (expectOnlyZero) {
+ assertTrue(refCount == 0);
+ }
+ if (refCount != 0) {
+ if (getLatch == null || wrappedCp) {
+ assertEquals(refCount, CustomInnerRegionObserver.countOfNext.get());
+ } else {
+ assertEquals(refCount, CustomInnerRegionObserver.countOfNext.get() + (NO_OF_THREADS));
+ }
+ }
+ }
+ }
+ CustomInnerRegionObserver.getCdl().get().countDown();
+ }
+
+ private static class GetThread extends Thread {
+ private final HTable table;
+ private final boolean tracker;
+ private final boolean multipleCFs;
+
+ public GetThread(HTable table, boolean tracker, boolean multipleCFs) {
+ this.table = table;
+ this.tracker = tracker;
+ this.multipleCFs = multipleCFs;
+ }
+
+ @Override
+ public void run() {
+ try {
+ initiateGet(table);
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+
+ private void initiateGet(HTable table) throws IOException {
+ Get get = new Get(ROW);
+ if (tracker) {
+ // Change this
+ if (!multipleCFs) {
+ get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 3));
+ get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 8));
+ get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 9));
+ // Unknown key
+ get.addColumn(FAMILY, Bytes.toBytes("testQualifier" + 900));
+ } else {
+ get.addColumn(Bytes.toBytes("testFamily" + 3), Bytes.toBytes("testQualifier" + 3));
+ get.addColumn(Bytes.toBytes("testFamily" + 8), Bytes.toBytes("testQualifier" + 8));
+ get.addColumn(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 9));
+ // Unknown key
+ get.addColumn(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 900));
+ }
+ }
+ CustomInnerRegionObserver.getCdl().set(latch);
+ Result r = table.get(get);
+ System.out.println(r);
+ if (!tracker) {
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER), data));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, QUALIFIER2), data2));
+ } else {
+ if (!multipleCFs) {
+ assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 3)), data2));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 8)), data2));
+ assertTrue(Bytes.equals(r.getValue(FAMILY, Bytes.toBytes("testQualifier" + 9)), data2));
+ } else {
+ assertTrue(Bytes.equals(
+ r.getValue(Bytes.toBytes("testFamily" + 3), Bytes.toBytes("testQualifier" + 3)),
+ data2));
+ assertTrue(Bytes.equals(
+ r.getValue(Bytes.toBytes("testFamily" + 8), Bytes.toBytes("testQualifier" + 8)),
+ data2));
+ assertTrue(Bytes.equals(
+ r.getValue(Bytes.toBytes("testFamily" + 9), Bytes.toBytes("testQualifier" + 9)),
+ data2));
+ }
+ }
+ }
+ }
+
+ private static class ScanThread extends Thread {
+ private final HTable table;
+ private final boolean reverse;
+
+ public ScanThread(HTable table, boolean reverse) {
+ this.table = table;
+ this.reverse = reverse;
+ }
+
+ @Override
+ public void run() {
+ try {
+ initiateScan(table);
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+
+ private void initiateScan(HTable table) throws IOException {
+ Scan scan = new Scan();
+ if (reverse) {
+ scan.setReversed(true);
+ }
+ CustomInnerRegionObserver.getCdl().set(latch);
+ ResultScanner resScanner = table.getScanner(scan);
+ int i = (reverse ? ROWS.length - 1 : 0);
+ boolean resultFound = false;
+ for (Result result : resScanner) {
+ resultFound = true;
+ System.out.println(result);
+ if (!reverse) {
+ assertTrue(Bytes.equals(result.getRow(), ROWS[i]));
+ i++;
+ } else {
+ assertTrue(Bytes.equals(result.getRow(), ROWS[i]));
+ i--;
+ }
+ }
+ assertTrue(resultFound);
+ }
+ }
+
+ private void waitForStoreFileCount(Store store, int count, int timeout)
+ throws InterruptedException {
+ long start = System.currentTimeMillis();
+ while (start + timeout > System.currentTimeMillis() && store.getStorefilesCount() != count) {
+ Thread.sleep(100);
+ }
+ System.out.println("start=" + start + ", now=" + System.currentTimeMillis() + ", cur="
+ + store.getStorefilesCount());
+ assertEquals(count, store.getStorefilesCount());
+ }
+
+ private static class CustomScanner implements RegionScanner {
+
+ private RegionScanner delegate;
+
+ public CustomScanner(RegionScanner delegate) {
+ this.delegate = delegate;
+ }
+
+ @Override
+ public boolean next(List results) throws IOException {
+ return delegate.next(results);
+ }
+
+ @Override
+ public boolean next(List result, ScannerContext scannerContext) throws IOException {
+ return delegate.next(result, scannerContext);
+ }
+
+ @Override
+ public boolean nextRaw(List result) throws IOException {
+ return delegate.nextRaw(result);
+ }
+
+ @Override
+ public boolean nextRaw(List result, ScannerContext context) throws IOException {
+ boolean nextRaw = delegate.nextRaw(result, context);
+ if (compactionLatch != null && compactionLatch.getCount() > 0) {
+ try {
+ compactionLatch.await();
+ } catch (InterruptedException ie) {
+ }
+ }
+ if (CustomInnerRegionObserver.throwException.get()) {
+ if (exceptionLatch.getCount() > 0) {
+ try {
+ exceptionLatch.await();
+ } catch (InterruptedException e) {
+ }
+ throw new IOException("throw exception");
+ }
+ }
+ return nextRaw;
+ }
+
+ @Override
+ public void close() throws IOException {
+ delegate.close();
+ }
+
+ @Override
+ public HRegionInfo getRegionInfo() {
+ return delegate.getRegionInfo();
+ }
+
+ @Override
+ public boolean isFilterDone() throws IOException {
+ return delegate.isFilterDone();
+ }
+
+ @Override
+ public boolean reseek(byte[] row) throws IOException {
+ return false;
+ }
+
+ @Override
+ public long getMaxResultSize() {
+ return delegate.getMaxResultSize();
+ }
+
+ @Override
+ public long getMvccReadPoint() {
+ return delegate.getMvccReadPoint();
+ }
+
+ @Override
+ public int getBatch() {
+ return delegate.getBatch();
+ }
+ }
+
+ public static class CustomInnerRegionObserverWrapper extends CustomInnerRegionObserver {
+ @Override
+ public RegionScanner postScannerOpen(ObserverContext e,
+ Scan scan, RegionScanner s) throws IOException {
+ return new CustomScanner(s);
+ }
+ }
+
+ public static class CustomInnerRegionObserver extends BaseRegionObserver {
+ static final AtomicLong sleepTime = new AtomicLong(0);
+ static final AtomicBoolean slowDownNext = new AtomicBoolean(false);
+ static final AtomicInteger countOfNext = new AtomicInteger(0);
+ static final AtomicInteger countOfGets = new AtomicInteger(0);
+ static final AtomicBoolean waitForGets = new AtomicBoolean(false);
+ static final AtomicBoolean throwException = new AtomicBoolean(false);
+ private static final AtomicReference cdl = new AtomicReference(
+ new CountDownLatch(0));
+
+ @Override
+ public boolean postScannerNext(ObserverContext e,
+ InternalScanner s, List results, int limit, boolean hasMore) throws IOException {
+ slowdownCode(e, false);
+ if (getLatch != null && getLatch.getCount() > 0) {
+ try {
+ getLatch.await();
+ } catch (InterruptedException e1) {
+ }
+ }
+ return super.postScannerNext(e, s, results, limit, hasMore);
+ }
+
+ @Override
+ public void postGetOp(ObserverContext e, Get get,
+ List| results) throws IOException {
+ slowdownCode(e, true);
+ super.postGetOp(e, get, results);
+ }
+
+ public static AtomicReference getCdl() {
+ return cdl;
+ }
+
+ private void slowdownCode(final ObserverContext e, boolean isGet) {
+ CountDownLatch latch = getCdl().get();
+ try {
+ System.out.println(latch.getCount() + " is the count " + isGet);
+ if (latch.getCount() > 0) {
+ if (isGet) {
+ countOfGets.incrementAndGet();
+ } else {
+ countOfNext.incrementAndGet();
+ }
+ LOG.info("Waiting for the counterCountDownLatch");
+ latch.await(2, TimeUnit.MINUTES); // To help the tests to finish.
+ if (latch.getCount() > 0) {
+ throw new RuntimeException("Can't wait more");
+ }
+ }
+ } catch (InterruptedException e1) {
+ LOG.error(e1);
+ }
+ }
+ }
+}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java
index 55f8dda..d6e8784 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestByteBufferOutputStream.java
@@ -43,7 +43,7 @@ public class TestByteBufferOutputStream {
bbos.write(bytes);
assertTrue(Bytes.compareTo(bytes, bbos.toByteArray(0, bytes.length)) == 0);
bbos.flush();
- return bbos.getByteBuffer();
+ return bbos.flipAndReturnBuffer();
}
}
}
\ No newline at end of file
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
index c410c13..7bc7b84 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheOnWrite.java
@@ -28,8 +28,13 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
+import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
import java.util.Random;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -290,6 +295,8 @@ public class TestCacheOnWrite {
DataBlockEncoding encodingInCache =
encoderType.getEncoder().getDataBlockEncoding();
+ List cachedBlocksOffset = new ArrayList();
+ Map cachedBlocks = new HashMap();
while (offset < reader.getTrailer().getLoadOnOpenDataOffset()) {
long onDiskSize = -1;
if (prevBlock != null) {
@@ -303,6 +310,8 @@ public class TestCacheOnWrite {
offset);
HFileBlock fromCache = (HFileBlock) blockCache.getBlock(blockCacheKey, true, false, true);
boolean isCached = fromCache != null;
+ cachedBlocksOffset.add(offset);
+ cachedBlocks.put(offset, fromCache);
boolean shouldBeCached = cowType.shouldBeCached(block.getBlockType());
assertTrue("shouldBeCached: " + shouldBeCached+ "\n" +
"isCached: " + isCached + "\n" +
@@ -355,6 +364,27 @@ public class TestCacheOnWrite {
while (scanner.next()) {
scanner.getKeyValue();
}
+ Iterator iterator = cachedBlocksOffset.iterator();
+ while(iterator.hasNext()) {
+ Long entry = iterator.next();
+ BlockCacheKey blockCacheKey = new BlockCacheKey(reader.getName(),
+ entry);
+ HFileBlock hFileBlock = cachedBlocks.get(entry);
+ if (hFileBlock != null) {
+ // call return twice because for the isCache cased the counter would have got incremented twice
+ blockCache.returnBlock(blockCacheKey, hFileBlock);
+ if(cacheCompressedData) {
+ if (this.compress == Compression.Algorithm.NONE
+ || cowType == CacheOnWriteType.INDEX_BLOCKS
+ || cowType == CacheOnWriteType.BLOOM_BLOCKS) {
+ blockCache.returnBlock(blockCacheKey, hFileBlock);
+ }
+ } else {
+ blockCache.returnBlock(blockCacheKey, hFileBlock);
+ }
+ }
+ }
+ scanner.returnCurrentBatch();
reader.close();
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
index 8f42e6b..9cb6150 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFile.java
@@ -329,7 +329,7 @@ public class TestHFile extends HBaseTestCase {
private void readNumMetablocks(Reader reader, int n) throws IOException {
for (int i = 0; i < n; i++) {
- ByteBuffer actual = reader.getMetaBlock("HFileMeta" + i, false);
+ ByteBuffer actual = reader.getMetaBlock("HFileMeta" + i, false).getBufferWithoutHeader();
ByteBuffer expected =
ByteBuffer.wrap(("something to test" + i).getBytes());
assertEquals("failed to match metadata",
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
index 8891a6a..4e2a7a8 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
@@ -167,6 +167,10 @@ public class TestHFileBlockIndex {
}
@Override
+ public void returnBlock(HFileBlock block) {
+ }
+
+ @Override
public HFileBlock readBlock(long offset, long onDiskSize,
boolean cacheBlock, boolean pread, boolean isCompaction,
boolean updateCacheMetrics, BlockType expectedBlockType,
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
index 511f942..9aa23c7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestByteBufferIOEngine.java
@@ -22,8 +22,10 @@ import static org.junit.Assert.assertTrue;
import java.nio.ByteBuffer;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Pair;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -63,8 +65,8 @@ public class TestByteBufferIOEngine {
offset = (int) (Math.random() * (capacity - maxBlockSize));
}
ioEngine.write(srcBuffer, offset);
- ByteBuffer dstBuffer = ByteBuffer.allocate(blockSize);
- ioEngine.read(dstBuffer, offset);
+ Pair pair = ioEngine.read(offset, blockSize);
+ ByteBuffer dstBuffer = pair.getFirst();
byte[] byteArray2 = dstBuffer.array();
for (int j = 0; j < byteArray.length; ++j) {
assertTrue(byteArray[j] == byteArray2[j]);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
index 8306114..e8ea8cc 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestFileIOEngine.java
@@ -24,8 +24,10 @@ import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
+import org.apache.hadoop.hbase.io.hfile.BlockCache.MemoryType;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.apache.hadoop.hbase.util.Pair;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -47,9 +49,9 @@ public class TestFileIOEngine {
for (int j = 0; j < data1.length; ++j) {
data1[j] = (byte) (Math.random() * 255);
}
- byte[] data2 = new byte[len];
fileIOEngine.write(ByteBuffer.wrap(data1), offset);
- fileIOEngine.read(ByteBuffer.wrap(data2), offset);
+ Pair pair = fileIOEngine.read(offset, len);
+ byte[] data2 = pair.getFirst().array();
for (int j = 0; j < data1.length; ++j) {
assertTrue(data1[j] == data2[j]);
}
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java
index 32eb9f6..104c09c 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/AbstractTestIPC.java
@@ -27,6 +27,7 @@ import static org.mockito.internal.verification.VerificationModeFactory.times;
import java.io.IOException;
import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
@@ -47,6 +48,7 @@ import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.util.StringUtils;
import org.junit.Test;
@@ -137,7 +139,7 @@ public abstract class AbstractTestIPC {
}
@Override
- public Pair call(BlockingService service, MethodDescriptor md,
+ public Triple call(BlockingService service, MethodDescriptor md,
Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status)
throws IOException {
return super.call(service, md, param, cellScanner, receiveTime, status);
@@ -204,8 +206,8 @@ public abstract class AbstractTestIPC {
PayloadCarryingRpcController pcrc =
new PayloadCarryingRpcController(CellUtil.createCellScanner(cells));
- Pair r =
- client.call(pcrc, md, param, md.getOutputType().toProto(), User.getCurrent(), address);
+ Pair r = client.call(pcrc, md, param, md.getOutputType().toProto(),
+ User.getCurrent(), address);
int index = 0;
while (r.getSecond().advance()) {
assertTrue(CELL.equals(r.getSecond().current()));
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java
index 8a6b092..3e8f0ea 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/ipc/TestRpcHandlerException.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.testclassification.RPCTests;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Pair;
+import org.apache.hadoop.hbase.util.Triple;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.experimental.categories.Category;
@@ -51,6 +52,7 @@ import org.mockito.Mockito;
import java.io.IOException;
import java.net.InetSocketAddress;
+import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
@@ -136,7 +138,7 @@ public class TestRpcHandlerException {
}
@Override
- public Pair call(BlockingService service, MethodDescriptor md,
+ public Triple call(BlockingService service, MethodDescriptor md,
Message param, CellScanner cellScanner, long receiveTime, MonitoredRPCHandler status)
throws IOException {
return super.call(service, md, param, cellScanner, receiveTime, status);
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
index 2965071..03e6df7 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHeapMemoryManager.java
@@ -39,6 +39,7 @@ import org.apache.hadoop.hbase.io.hfile.BlockCacheKey;
import org.apache.hadoop.hbase.io.hfile.CacheStats;
import org.apache.hadoop.hbase.io.hfile.Cacheable;
import org.apache.hadoop.hbase.io.hfile.CachedBlock;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.ResizableBlockCache;
import org.apache.hadoop.hbase.io.util.HeapMemorySizeUtil;
import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext;
@@ -400,6 +401,11 @@ public class TestHeapMemoryManager {
public BlockCache[] getBlockCaches() {
return null;
}
+
+ @Override
+ public boolean returnBlock(BlockCacheKey cacheKey, HFileBlock block) {
+ return true;
+ }
}
private static class MemstoreFlusherStub implements FlushRequester {
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java
index a3c6685..8442def 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScannerHeartbeatMessages.java
@@ -425,14 +425,14 @@ public class TestScannerHeartbeatMessages {
// Instantiate the custom heartbeat region scanners
@Override
protected RegionScanner instantiateRegionScanner(Scan scan,
- List additionalScanners) throws IOException {
+ List additionalScanners, boolean cellBlock) throws IOException {
if (scan.isReversed()) {
if (scan.getFilter() != null) {
scan.getFilter().setReversed(true);
}
- return new HeartbeatReversedRegionScanner(scan, additionalScanners, this);
+ return new HeartbeatReversedRegionScanner(scan, additionalScanners, this, cellBlock);
}
- return new HeartbeatRegionScanner(scan, additionalScanners, this);
+ return new HeartbeatRegionScanner(scan, additionalScanners, this, cellBlock);
}
}
@@ -442,8 +442,8 @@ public class TestScannerHeartbeatMessages {
*/
private static class HeartbeatReversedRegionScanner extends ReversedRegionScannerImpl {
HeartbeatReversedRegionScanner(Scan scan, List additionalScanners,
- HRegion region) throws IOException {
- super(scan, additionalScanners, region);
+ HRegion region, boolean cellBlock) throws IOException {
+ super(scan, additionalScanners, region, cellBlock);
}
@Override
@@ -469,9 +469,9 @@ public class TestScannerHeartbeatMessages {
* column family cells
*/
private static class HeartbeatRegionScanner extends RegionScannerImpl {
- HeartbeatRegionScanner(Scan scan, List additionalScanners, HRegion region)
- throws IOException {
- region.super(scan, additionalScanners, region);
+ HeartbeatRegionScanner(Scan scan, List additionalScanners, HRegion region,
+ boolean cellBlock) throws IOException {
+ region.super(scan, additionalScanners, region, cellBlock);
}
@Override
| | | | | | | | | | | | | | | | | | | | | |