From f7ac9216a9f9cdd013fe175a83ef3ac334fe921d Mon Sep 17 00:00:00 2001 From: huzheng Date: Mon, 7 Jan 2019 14:41:18 +0800 Subject: [PATCH] HBASE-21657 PrivateCellUtil#estimatedSerializedSizeOf has been the bottleneck in 100% scan case --- .../hadoop/hbase/client/ConnectionUtils.java | 2 +- .../org/apache/hadoop/hbase/client/Mutation.java | 4 +- .../org/apache/hadoop/hbase/client/Result.java | 2 +- .../apache/hadoop/hbase/filter/KeyOnlyFilter.java | 10 ++++ .../hbase/ipc/TestHBaseRpcControllerImpl.java | 10 ++++ .../apache/hadoop/hbase/ByteBufferKeyValue.java | 5 ++ .../main/java/org/apache/hadoop/hbase/Cell.java | 8 +++- .../java/org/apache/hadoop/hbase/KeyValue.java | 5 ++ .../java/org/apache/hadoop/hbase/KeyValueUtil.java | 5 +- .../hadoop/hbase/NoTagsByteBufferKeyValue.java | 10 ++++ .../org/apache/hadoop/hbase/PrivateCellUtil.java | 53 ++-------------------- .../apache/hadoop/hbase/SizeCachedKeyValue.java | 5 ++ .../hadoop/hbase/SizeCachedNoTagsKeyValue.java | 10 ++++ .../hadoop/hbase/io/encoding/RowIndexSeekerV1.java | 5 +- .../java/org/apache/hadoop/hbase/TestCellUtil.java | 20 ++++++++ .../hadoop/hbase/util/MapReduceExtendedCell.java | 2 +- .../hadoop/hbase/io/hfile/HFileBlockIndex.java | 2 +- .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 4 +- .../apache/hadoop/hbase/regionserver/HRegion.java | 2 +- .../hadoop/hbase/regionserver/RSRpcServices.java | 26 +++++++---- .../apache/hadoop/hbase/regionserver/Segment.java | 4 +- .../hadoop/hbase/regionserver/StoreScanner.java | 3 +- .../java/org/apache/hadoop/hbase/wal/WALEdit.java | 2 +- .../apache/hadoop/hbase/wal/WALPrettyPrinter.java | 2 +- .../hbase/TestPartialResultsFromClientSide.java | 3 +- .../TestServerSideScanMetricsFromClientSide.java | 2 +- .../TestPassCustomCellViaRegionObserver.java | 11 +++++ 27 files changed, 139 insertions(+), 78 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java index 63ef865..1b84dfb 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ConnectionUtils.java @@ -320,7 +320,7 @@ public final class ConnectionUtils { long estimatedHeapSizeOfResult = 0; // We don't make Iterator here for (Cell cell : rs.rawCells()) { - estimatedHeapSizeOfResult += PrivateCellUtil.estimatedSizeOfCell(cell); + estimatedHeapSizeOfResult += cell.heapSize(); } return estimatedHeapSizeOfResult; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java index dc8199d..8bfdf89 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Mutation.java @@ -499,8 +499,8 @@ public abstract class Mutation extends OperationWithAttributes implements Row, C heapsize += ClassSize.align(ClassSize.ARRAY + size * ClassSize.REFERENCE); - for(Cell cell : entry.getValue()) { - heapsize += PrivateCellUtil.estimatedSizeOfCell(cell); + for (Cell cell : entry.getValue()) { + heapsize += cell.heapSize(); } } heapsize += getAttributeSize(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java index 832689e..5d56e83 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Result.java @@ -859,7 +859,7 @@ public class Result implements CellScannable, CellScanner { return size; } for (Cell c : result.rawCells()) { - size += PrivateCellUtil.estimatedSizeOfCell(c); + size += c.heapSize(); } return size; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/KeyOnlyFilter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/KeyOnlyFilter.java index a66441b..0fa71de 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/KeyOnlyFilter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/KeyOnlyFilter.java @@ -232,6 +232,11 @@ public class KeyOnlyFilter extends FilterBase { } @Override + public int getSerializedSize() { + return cell.getSerializedSize(); + } + + @Override public byte[] getTagsArray() { return HConstants.EMPTY_BYTE_ARRAY; } @@ -245,6 +250,11 @@ public class KeyOnlyFilter extends FilterBase { public int getTagsLength() { return 0; } + + @Override + public long heapSize() { + return cell.heapSize(); + } } static class KeyOnlyByteBufferExtendedCell extends ByteBufferExtendedCell { diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestHBaseRpcControllerImpl.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestHBaseRpcControllerImpl.java index 2c1dd4f..66c81df 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestHBaseRpcControllerImpl.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/ipc/TestHBaseRpcControllerImpl.java @@ -75,6 +75,11 @@ public class TestHBaseRpcControllerImpl { // Fake out a Cell. All this Cell has is a value that is an int in size and equal // to the above 'index' param serialized as an int. return new Cell() { + @Override + public long heapSize() { + return 0; + } + private final int i = index; @Override @@ -165,6 +170,11 @@ public class TestHBaseRpcControllerImpl { } @Override + public int getSerializedSize() { + return 0; + } + + @Override public int getTagsOffset() { // unused return 0; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyValue.java index 963b411..cafeb3e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/ByteBufferKeyValue.java @@ -287,6 +287,11 @@ public class ByteBufferKeyValue extends ByteBufferExtendedCell { } @Override + public int getSerializedSize() { + return this.length; + } + + @Override public void write(ByteBuffer buf, int offset) { ByteBufferUtils.copyFromBufferToBuffer(this.buf, buf, this.offset, offset, this.length); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/Cell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/Cell.java index 8cdefff..7db5fe3 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/Cell.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/Cell.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase; +import org.apache.hadoop.hbase.io.HeapSize; import org.apache.yetus.audience.InterfaceAudience; @@ -59,7 +60,7 @@ import org.apache.yetus.audience.InterfaceAudience; *

*/ @InterfaceAudience.Public -public interface Cell { +public interface Cell extends HeapSize { //1) Row @@ -172,6 +173,11 @@ public interface Cell { int getValueLength(); /** + * @return Serialized size (defaults to include tag length). + */ + int getSerializedSize(); + + /** * Contiguous raw bytes representing tags that may start at any index in the containing array. * @return the tags byte array * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java index bdaefff..ff09ea6 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java @@ -2323,6 +2323,11 @@ public class KeyValue implements ExtendedCell, Cloneable { } @Override + public int getSerializedSize() { + return this.length; + } + + @Override public void write(ByteBuffer buf, int offset) { ByteBufferUtils.copyFromArrayToBuffer(buf, offset, this.bytes, this.offset, this.length); } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java index 16ebdbf..1230469 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValueUtil.java @@ -746,11 +746,14 @@ public class KeyValueUtil { } public static int getSerializedSize(Cell cell, boolean withTags) { + if (withTags) { + return cell.getSerializedSize(); + } if (cell instanceof ExtendedCell) { return ((ExtendedCell) cell).getSerializedSize(withTags); } return length(cell.getRowLength(), cell.getFamilyLength(), cell.getQualifierLength(), - cell.getValueLength(), cell.getTagsLength(), withTags); + cell.getValueLength(), cell.getTagsLength(), withTags); } public static int oswrite(final Cell cell, final OutputStream out, final boolean withTags) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/NoTagsByteBufferKeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/NoTagsByteBufferKeyValue.java index f00961f..b37a266 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/NoTagsByteBufferKeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/NoTagsByteBufferKeyValue.java @@ -51,6 +51,16 @@ public class NoTagsByteBufferKeyValue extends ByteBufferKeyValue { return this.length; } + /** + * Override by just returning the length for saving cost of method dispatching. If not, it will + * call {@link ExtendedCell#getSerializedSize()} firstly, then forward to + * {@link NoTagsByteBufferKeyValue#getSerializedSize(boolean)} (See HBASE-21657) + */ + @Override + public int getSerializedSize() { + return this.length; + } + @Override public ExtendedCell deepClone() { byte[] copy = new byte[this.length]; diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java index 523cc8e..ded1435 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/PrivateCellUtil.java @@ -31,7 +31,6 @@ import java.util.Iterator; import java.util.List; import java.util.Optional; import org.apache.hadoop.hbase.filter.ByteArrayComparable; -import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.TagCompressionContext; import org.apache.hadoop.hbase.io.util.Dictionary; import org.apache.hadoop.hbase.io.util.StreamUtils; @@ -250,7 +249,7 @@ public final class PrivateCellUtil { @Override public long heapSize() { - long sum = HEAP_SIZE_OVERHEAD + estimatedSizeOfCell(cell); + long sum = HEAP_SIZE_OVERHEAD + cell.heapSize(); if (this.tags != null) { sum += ClassSize.sizeOf(this.tags); } @@ -446,7 +445,7 @@ public final class PrivateCellUtil { @Override public long heapSize() { - long sum = HEAP_SIZE_OVERHEAD + estimatedSizeOfCell(cell); + long sum = HEAP_SIZE_OVERHEAD + cell.heapSize(); // this.tags is on heap byte[] if (this.tags != null) { sum += ClassSize.sizeOf(this.tags); @@ -705,7 +704,7 @@ public final class PrivateCellUtil { @Override public ExtendedCell deepClone() { - Cell clonedBaseCell = ((ExtendedCell) this.cell).deepClone(); + Cell clonedBaseCell = this.cell.deepClone(); if (clonedBaseCell instanceof ByteBufferExtendedCell) { return new ValueAndTagRewriteByteBufferExtendedCell( (ByteBufferExtendedCell) clonedBaseCell, this.value, this.tags); @@ -2737,34 +2736,7 @@ public final class PrivateCellUtil { * actual cell length. */ public static int estimatedSerializedSizeOf(final Cell cell) { - if (cell instanceof ExtendedCell) { - return ((ExtendedCell) cell).getSerializedSize(true) + Bytes.SIZEOF_INT; - } - - return getSumOfCellElementLengths(cell) + - // Use the KeyValue's infrastructure size presuming that another implementation would have - // same basic cost. - KeyValue.ROW_LENGTH_SIZE + KeyValue.FAMILY_LENGTH_SIZE + - // Serialization is probably preceded by a length (it is in the KeyValueCodec at least). - Bytes.SIZEOF_INT; - } - - /** - * @param cell - * @return Sum of the lengths of all the elements in a Cell; does not count in any infrastructure - */ - private static int getSumOfCellElementLengths(final Cell cell) { - return getSumOfCellKeyElementLengths(cell) + cell.getValueLength() + cell.getTagsLength(); - } - - /** - * @param cell - * @return Sum of all elements that make up a key; does not include infrastructure, tags or - * values. - */ - private static int getSumOfCellKeyElementLengths(final Cell cell) { - return cell.getRowLength() + cell.getFamilyLength() + cell.getQualifierLength() - + KeyValue.TIMESTAMP_TYPE_SIZE; + return cell.getSerializedSize() + Bytes.SIZEOF_INT; } /** @@ -2779,23 +2751,6 @@ public final class PrivateCellUtil { } /** - * This is an estimate of the heap space occupied by a cell. When the cell is of type - * {@link HeapSize} we call {@link HeapSize#heapSize()} so cell can give a correct value. In other - * cases we just consider the bytes occupied by the cell components ie. row, CF, qualifier, - * timestamp, type, value and tags. - * Note that this can be the JVM heap space (on-heap) or the OS heap (off-heap) - * @param cell - * @return estimate of the heap space - */ - public static long estimatedSizeOfCell(final Cell cell) { - if (cell instanceof HeapSize) { - return ((HeapSize) cell).heapSize(); - } - // TODO: Add sizing of references that hold the row, family, etc., arrays. - return estimatedSerializedSizeOf(cell); - } - - /** * This method exists just to encapsulate how we serialize keys. To be replaced by a factory that * we query to figure what the Cell implementation is and then, what serialization engine to use * and further, how to serialize the key for inclusion in hfile index. TODO. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedKeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedKeyValue.java index aa649c7..3b8070e 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedKeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedKeyValue.java @@ -62,4 +62,9 @@ public class SizeCachedKeyValue extends KeyValue { public long heapSize() { return super.heapSize() + FIXED_OVERHEAD; } + + @Override + public int getSerializedSize() { + return this.length; + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedNoTagsKeyValue.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedNoTagsKeyValue.java index 88b6177..29ce9cb 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedNoTagsKeyValue.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/SizeCachedNoTagsKeyValue.java @@ -51,4 +51,14 @@ public class SizeCachedNoTagsKeyValue extends SizeCachedKeyValue { public int getSerializedSize(boolean withTags) { return this.length; } + + /** + * Override by just returning the length for saving cost of method dispatching. If not, it will + * call {@link ExtendedCell#getSerializedSize()} firstly, then forward to + * {@link SizeCachedNoTagsKeyValue#getSerializedSize(boolean)}. (See HBASE-21657) + */ + @Override + public int getSerializedSize() { + return this.length; + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java index 5c7ca53..2d484b9 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/RowIndexSeekerV1.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.NoTagsByteBufferKeyValue; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.SizeCachedKeyValue; import org.apache.hadoop.hbase.SizeCachedNoTagsKeyValue; @@ -383,7 +384,9 @@ public class RowIndexSeekerV1 extends AbstractEncodedSeeker { currentBuffer.asSubByteBuffer(startOffset, cellBufSize, tmpPair); ByteBuffer buf = tmpPair.getFirst(); if (buf.isDirect()) { - ret = new ByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId); + ret = + tagsLength > 0 ? new ByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId) + : new NoTagsByteBufferKeyValue(buf, tmpPair.getSecond(), cellBufSize, seqId); } else { if (tagsLength > 0) { ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java index 069bcfb..9d4d48f 100644 --- a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestCellUtil.java @@ -180,6 +180,11 @@ public class TestCellUtil { } @Override + public int getSerializedSize() { + return 0; + } + + @Override public byte[] getTagsArray() { // TODO Auto-generated method stub return null; @@ -202,6 +207,11 @@ public class TestCellUtil { // TODO Auto-generated method stub return 0; } + + @Override + public long heapSize() { + return 0; + } } /** @@ -631,6 +641,11 @@ public class TestCellUtil { } @Override + public int getSerializedSize() { + return this.kv.getSerializedSize(); + } + + @Override public byte[] getTagsArray() { return this.kv.getTagsArray(); } @@ -644,5 +659,10 @@ public class TestCellUtil { public int getTagsLength() { return this.kv.getTagsLength(); } + + @Override + public long heapSize() { + return this.kv.heapSize(); + } } } diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapReduceExtendedCell.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapReduceExtendedCell.java index 75b57f4..175c004 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapReduceExtendedCell.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/util/MapReduceExtendedCell.java @@ -241,7 +241,7 @@ public class MapReduceExtendedCell extends ByteBufferExtendedCell { @Override public long heapSize() { - return PrivateCellUtil.estimatedSizeOfCell(cell); + return cell.heapSize(); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index e8818be..4d02316 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -266,7 +266,7 @@ public class HFileBlockIndex { // Adding blockKeys for (Cell key : blockKeys) { - heapSize += ClassSize.align(PrivateCellUtil.estimatedSizeOfCell(key)); + heapSize += ClassSize.align(key.heapSize()); } } // Add comparator and the midkey atomicreference diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index a4a40ba..450e6de 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.NoTagsByteBufferKeyValue; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.ByteBufferKeyValue; @@ -964,7 +965,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } else { ByteBuffer buf = blockBuffer.asSubByteBuffer(cellBufSize); if (buf.isDirect()) { - ret = new ByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId); + ret = currTagsLen > 0 ? new ByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId) + : new NoTagsByteBufferKeyValue(buf, buf.position(), cellBufSize, seqId); } else { if (currTagsLen > 0) { ret = new SizeCachedKeyValue(buf.array(), buf.arrayOffset() + buf.position(), diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index de1372d..00f73c7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -6652,7 +6652,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi scannerContext.incrementBatchProgress(results.size()); for (Cell cell : results) { scannerContext.incrementSizeProgress(PrivateCellUtil.estimatedSerializedSizeOf(cell), - PrivateCellUtil.estimatedSizeOfCell(cell)); + cell.heapSize()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 4761e4d..fd69f59 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -1343,14 +1343,24 @@ public class RSRpcServices implements HBaseRPCErrorHandler, return 0L; } + Object addSize(RpcCallContext context, Result r, Object lastBlock) { + return addSize(context, r, lastBlock, false); + } + /** * Method to account for the size of retained cells and retained data blocks. + * @param context rpc call context + * @param r result to add size. + * @param lastBlock last block to check whether we need to add the block size in context. + * @param skipSumUpCellSize true to skip to sum up the cell size of context. * @return an object that represents the last referenced block from this response. */ - Object addSize(RpcCallContext context, Result r, Object lastBlock) { + Object addSize(RpcCallContext context, Result r, Object lastBlock, boolean skipSumUpCellSize) { if (context != null && r != null && !r.isEmpty()) { for (Cell c : r.rawCells()) { - context.incrementResponseCellSize(PrivateCellUtil.estimatedSerializedSizeOf(c)); + if (!skipSumUpCellSize) { + context.incrementResponseCellSize(PrivateCellUtil.estimatedSerializedSizeOf(c)); + } // Since byte buffers can point all kinds of crazy places it's harder to keep track // of which blocks are kept alive by what byte buffer. @@ -3044,7 +3054,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // return whether we have more results in region. private void scan(HBaseRpcController controller, ScanRequest request, RegionScannerHolder rsh, long maxQuotaResultSize, int maxResults, int limitOfRows, List results, - ScanResponse.Builder builder, MutableObject lastBlock, RpcCallContext context) + ScanResponse.Builder builder, MutableObject lastBlock, RpcCallContext context) throws IOException { HRegion region = rsh.r; RegionScanner scanner = rsh.s; @@ -3116,6 +3126,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, // batch progress from previous calls to affect future calls scannerContext.setBatchProgress(0); + long initDataSize = scannerContext.getDataSizeProgress(); // Collect values to be returned here moreRows = scanner.nextRaw(values, scannerContext); @@ -3145,7 +3156,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } boolean mayHaveMoreCellsInRow = scannerContext.mayHaveMoreCellsInRow(); Result r = Result.create(values, null, stale, mayHaveMoreCellsInRow); - lastBlock.setValue(addSize(context, r, lastBlock.getValue())); + context.incrementResponseCellSize(scannerContext.getDataSizeProgress() - initDataSize); + lastBlock.setValue(addSize(context, r, lastBlock.getValue(), true)); results.add(r); numOfResults++; if (!mayHaveMoreCellsInRow && limitOfRows > 0) { @@ -3174,10 +3186,6 @@ public class RSRpcServices implements HBaseRPCErrorHandler, limitReached = sizeLimitReached || timeLimitReached || resultsLimitReached; if (limitReached || !moreRows) { - if (LOG.isTraceEnabled()) { - LOG.trace("Done scanning. limitReached: " + limitReached + " moreRows: " + moreRows - + " scannerContext: " + scannerContext); - } // We only want to mark a ScanResponse as a heartbeat message in the event that // there are more values to be read server side. If there aren't more values, // marking it as a heartbeat is wasteful because the client will need to issue @@ -3350,7 +3358,7 @@ public class RSRpcServices implements HBaseRPCErrorHandler, MutableObject lastBlock = new MutableObject<>(); boolean scannerClosed = false; try { - List results = new ArrayList<>(); + List results = new ArrayList<>(Math.min(rows, 512)); if (rows > 0) { boolean done = false; // Call coprocessor. Get region info from scanner. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Segment.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Segment.java index 81efae6..e68ce80 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Segment.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/Segment.java @@ -349,7 +349,7 @@ public abstract class Segment implements MemStoreSizing { } res += indexEntryOnHeapSize(onHeap); if(onHeap) { - res += PrivateCellUtil.estimatedSizeOfCell(cell); + res += cell.heapSize(); } res = ClassSize.align(res); } @@ -366,7 +366,7 @@ public abstract class Segment implements MemStoreSizing { } res += indexEntryOffHeapSize(offHeap); if(offHeap) { - res += PrivateCellUtil.estimatedSizeOfCell(cell); + res += cell.heapSize(); } res = ClassSize.align(res); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 7bf12f7..29369c7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -605,8 +605,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner totalBytesRead += cellSize; // Update the progress of the scanner context - scannerContext.incrementSizeProgress(cellSize, - PrivateCellUtil.estimatedSizeOfCell(cell)); + scannerContext.incrementSizeProgress(cellSize, cell.heapSize()); scannerContext.incrementBatchProgress(1); if (matcher.isUserScan() && totalBytesRead > maxRowSize) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALEdit.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALEdit.java index 3d90a45..7c28143 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALEdit.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALEdit.java @@ -218,7 +218,7 @@ public class WALEdit implements HeapSize { public long heapSize() { long ret = ClassSize.ARRAYLIST; for (Cell cell : cells) { - ret += PrivateCellUtil.estimatedSizeOfCell(cell); + ret += cell.heapSize(); } return ret; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALPrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALPrettyPrinter.java index 45934d4..e7c4c5b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALPrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALPrettyPrinter.java @@ -308,7 +308,7 @@ public class WALPrettyPrinter { if (row == null || ((String) op.get("row")).equals(row)) { actions.add(op); } - op.put("total_size_sum", PrivateCellUtil.estimatedSizeOfCell(cell)); + op.put("total_size_sum", cell.heapSize()); } if (actions.isEmpty()) continue; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index 965243f..44b3c7b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -391,8 +391,7 @@ public class TestPartialResultsFromClientSide { // Estimate the cell heap size. One difference is that on server side, the KV Heap size is // estimated differently in case the cell is backed up by MSLAB byte[] (no overhead for // backing array). Thus below calculation is a bit brittle. - CELL_HEAP_SIZE = PrivateCellUtil.estimatedSizeOfCell(result.rawCells()[0]) - - (ClassSize.ARRAY+3); + CELL_HEAP_SIZE = result.rawCells()[0].heapSize() - (ClassSize.ARRAY + 3); if (LOG.isInfoEnabled()) LOG.info("Cell heap size: " + CELL_HEAP_SIZE); scanner.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestServerSideScanMetricsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestServerSideScanMetricsFromClientSide.java index 5a3ba82..da84f2f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestServerSideScanMetricsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestServerSideScanMetricsFromClientSide.java @@ -155,7 +155,7 @@ public class TestServerSideScanMetricsFromClientSide { assertTrue(result.rawCells() != null); assertTrue(result.rawCells().length == 1); - CELL_HEAP_SIZE = PrivateCellUtil.estimatedSizeOfCell(result.rawCells()[0]); + CELL_HEAP_SIZE = result.rawCells()[0].heapSize(); scanner.close(); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestPassCustomCellViaRegionObserver.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestPassCustomCellViaRegionObserver.java index fcfd4f6..d55e8e0 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestPassCustomCellViaRegionObserver.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/coprocessor/TestPassCustomCellViaRegionObserver.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hbase.CompareOperator; import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Append; @@ -213,6 +214,11 @@ public class TestPassCustomCellViaRegionObserver { Cell.Type type, byte[] value) { return new Cell() { + @Override + public long heapSize() { + return 0; + } + private byte[] getArray(byte[] array) { return array == null ? HConstants.EMPTY_BYTE_ARRAY : array; } @@ -297,6 +303,11 @@ public class TestPassCustomCellViaRegionObserver { } @Override + public int getSerializedSize() { + return KeyValueUtil.getSerializedSize(this, true); + } + + @Override public byte[] getTagsArray() { return getArray(null); } -- 2.7.4