From cd26dca62383f99efc20d882e213f52d76b3afae Mon Sep 17 00:00:00 2001 From: anastas Date: Sun, 30 Apr 2017 14:57:05 +0300 Subject: [PATCH] HBASE-16436 Adding CellChunkMap, tests and code review comments --- .../hadoop/hbase/regionserver/CellChunkMap.java | 126 +++++++++++++++++++++ .../hadoop/hbase/regionserver/TestCellFlatSet.java | 67 ++++++++++- 2 files changed, 190 insertions(+), 3 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java new file mode 100644 index 0000000..0a207a6 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java @@ -0,0 +1,126 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Cellersion 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY CellIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ByteBufferUtils; + +import java.util.Comparator; + + +/** + * CellChunkMap is an array of serialized representations of Cell + * (pointing to Chunks with full Cell data) and can be allocated both off-heap and on-heap. + * + * CellChunkMap is a byte array (chunk) holding all that is needed to access a Cell, which + * is actually saved on another deeper chunk. + * Per Cell we have a reference to this deeper byte array B (chunk ID, integer), + * offset in bytes in B (integer), length in bytes in B (integer) and seqID of the cell (long). + * In order to save reference to byte array we use the Chunk's ID given by ChunkCreator. + * + * The CellChunkMap memory layout relevant to a deeper byte array B, holding the actual cell data: + * + * <----------------- first Cell --------------------------------------> <- second Cell ... + * --------------------------------------------------------------------------------------- ... + * | integer: 4 bytes | integer: 4 bytes | integer: 4 bytes | long: 8 bytes | + * | chunkID of chunk B| offset in B where | length of Cell's | sequence ID of | ... + * | holding Cell data | Cell's data starts| data in B | the Cell | + * --------------------------------------------------------------------------------------- ... + */ +@InterfaceAudience.Private +public class CellChunkMap extends CellFlatMap { + + private final Chunk[] chunks; // the array of chunks, on which the index is based + private final int numOfCellsInsideChunk; // constant number of cell-representations in a chunk + private final ChunkCreator chunkCreator; // ChunkCreator for chunkID translation + + // each cell-representation requires three integers for chunkID (reference to the ByteBuffer), + // offset and length, and one long for seqID + public static final int SIZEOF_CELL_REP = 3*Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG ; + + /** + * C-tor for creating CellChunkMap from existing Chunk array, which must be ordered + * (decreasingly or increasingly according to parameter "descending") + * @param comparator a tool for comparing cells + * @param chunks ordered array of index chunk with cell representations + * @param min the index of the first cell (usually 0) + * @param max number of Cells or the index of the cell after the maximal cell + * @param descending the order of the given array + */ + public CellChunkMap(Comparator comparator, + Chunk[] chunks, int min, int max, boolean descending) { + super(comparator, min, max, descending); + this.chunkCreator = ChunkCreator.getInstance(); + this.chunks = chunks; + + this.numOfCellsInsideChunk = // each chunk starts with its own ID following the cells data + (chunkCreator.getChunkSize() - Bytes.SIZEOF_INT) / SIZEOF_CELL_REP; + + } + + /* To be used by base (CellFlatMap) class only to create a sub-CellFlatMap + * Should be used only to create only CellChunkMap from CellChunkMap */ + @Override + protected CellFlatMap createSubCellFlatMap(int min, int max, boolean descending) { + return new CellChunkMap(this.comparator(), this.chunks, min, max, descending); + } + + + @Override + protected Cell getCell(int i) { + // get the index of the relevant chunk inside chunk array + int chunkIndex = (i / numOfCellsInsideChunk); + ByteBuffer block = chunks[chunkIndex].getData();// get the ByteBuffer of the relevant chunk + i = i - chunkIndex * numOfCellsInsideChunk; // get the index of the cell-representation + + // find inside the offset inside the chunk holding the index, skip bytes for chunk id + int offsetInBytes = Bytes.SIZEOF_INT + i* SIZEOF_CELL_REP; + + + // find the chunk holding the data of the cell, the chunkID is stored first + int chunkId = ByteBufferUtils.toInt(block, offsetInBytes); + Chunk chunk = chunkCreator.getChunk(chunkId); + if (chunk == null) { + // this should not happen, putting an assertion here at least for the testing period + assert false; + } + + // find the offset of the data of the cell, skip integer for chunkID, offset is stored second + int offsetOfCell = ByteBufferUtils.toInt(block, offsetInBytes + Bytes.SIZEOF_INT); + // find the length of the data of the cell, skip two integers for chunkID and offset, + // length is stored third + int lengthOfCell = ByteBufferUtils.toInt(block, offsetInBytes + 2*Bytes.SIZEOF_INT); + // find the seqID of the cell, skip three integers for chunkID, offset, and length + // the seqID is plain written as part of the cell representation + long cellSeqID = ByteBufferUtils.toLong(block, offsetInBytes + 3*Bytes.SIZEOF_INT); + + ByteBuffer buf = chunk.getData(); // get the ByteBuffer where the cell data is stored + if (buf == null) { + // this should not happen, putting an assertion here at least for the testing period + assert false; + } + + return new ByteBufferChunkCell(buf, offsetOfCell, lengthOfCell, cellSeqID); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java index 09877b0..29ef729 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java @@ -18,6 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; + +import java.lang.management.ManagementFactory; + +import java.nio.ByteBuffer; import java.util.Iterator; import java.util.NavigableMap; import java.util.NavigableSet; @@ -28,13 +32,19 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; + +import org.apache.hadoop.hbase.io.util.MemorySizeUtil; + + import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import org.junit.experimental.categories.Category; - +import static org.junit.Assert.assertTrue; @Category({RegionServerTests.class, SmallTests.class}) public class TestCellFlatSet extends TestCase { @@ -47,6 +57,11 @@ public class TestCellFlatSet extends TestCase { private KeyValue lowerOuterCell; private KeyValue upperOuterCell; + + private CellChunkMap ccm; // for testing CellChunkMap + private static ChunkCreator chunkCreator; + + @Override protected void setUp() throws Exception { super.setUp(); @@ -71,17 +86,38 @@ public class TestCellFlatSet extends TestCase { ascCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,ascCells,0,NUM_OF_CELLS,false); descCells = new Cell[] {kv4,kv3,kv2,kv1}; descCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,descCells,0,NUM_OF_CELLS,true); + CONF.setBoolean(MemStoreLAB.USEMSLAB_KEY, true); CONF.setFloat(MemStoreLAB.CHUNK_POOL_MAXSIZE_KEY, 0.2f); ChunkCreator.chunkPoolDisabled = false; + + + long globalMemStoreLimit = (long) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage() + .getMax() * MemorySizeUtil.getGlobalMemStoreHeapPercent(CONF, false)); + chunkCreator = ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, + globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null); + assertTrue(chunkCreator != null); + + ccm = setUpCellChunkMap(); } - /* Create and test CellSet based on CellArrayMap */ - public void testCellBlocksOnHeap() throws Exception { + /* Create and test ascending CellSet based on CellArrayMap */ + @Test + public void testCellArrayMapAsc() throws Exception { CellSet cs = new CellSet(ascCbOnHeap); testCellBlocks(cs); testIterators(cs); } + + /* Create and test ascending CellSet based on CellChunkMap */ + @Test + public void testCellChunkMapAsc() throws Exception { + CellSet cs = new CellSet(ccm); + testCellBlocks(cs); + testIterators(cs); + testSubSet(cs); + } + @Test public void testAsc() throws Exception { CellSet ascCs = new CellSet(ascCbOnHeap); @@ -200,4 +236,29 @@ public class TestCellFlatSet extends TestCase { } assertEquals(NUM_OF_CELLS, count); } + + /* Create CellChunkMap with four cells inside the index chunk */ + private CellChunkMap setUpCellChunkMap() { + + Chunk idxChunk = chunkCreator.getChunk(); + ByteBuffer idxBuffer = idxChunk.getData(); // index chunk buffer for cell-representations + // allocate new chunk and use its buffer to hold the full data of the cells + Chunk dataChunk = chunkCreator.getChunk(); + + ByteBuffer dataBuffer = dataChunk.getData(); + int offset = Bytes.SIZEOF_INT; // skip the space for chunk ID + int pos = offset; + + for (Cell kv: ascCells) { + KeyValueUtil.appendTo(kv, dataBuffer, offset, false); // write deep cell data + + pos = ByteBufferUtils.putInt(idxBuffer, pos, dataChunk.getId()); // write data chunk index + pos = ByteBufferUtils.putInt(idxBuffer, pos, offset); // offset + pos = ByteBufferUtils.putInt(idxBuffer, pos, KeyValueUtil.length(kv)); // length + pos = ByteBufferUtils.putLong(idxBuffer, pos, kv.getSequenceId()); // length + offset += KeyValueUtil.length(kv); + } + Chunk [] chunkArray = {idxChunk}; + return new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,false); + } } -- 1.8.5.2 (Apple Git-48)