From 99f31d0a6e409151fe9612b3e47c1c404d7fff60 Mon Sep 17 00:00:00 2001 From: anastas Date: Sun, 14 May 2017 10:34:16 +0300 Subject: [PATCH] HBASE-16436 Adding CellChunkMap code, its tests and fixes to all code review comments --- .../hadoop/hbase/regionserver/CellChunkMap.java | 127 ++++++++++++++++++++ .../apache/hadoop/hbase/regionserver/CellSet.java | 3 + .../hadoop/hbase/regionserver/TestCellFlatSet.java | 128 ++++++++++++++++++++- 3 files changed, 253 insertions(+), 5 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java new file mode 100644 index 0000000..a965ade --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellChunkMap.java @@ -0,0 +1,127 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Cellersion 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY CellIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.regionserver; + +import java.nio.ByteBuffer; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ByteBufferUtils; + +import java.util.Comparator; + + +/** + * CellChunkMap is an array of serialized representations of Cell + * (pointing to Chunks with full Cell data) and can be allocated both off-heap and on-heap. + * + * CellChunkMap is a byte array (chunk) holding all that is needed to access a Cell, which + * is actually saved on another deeper chunk. + * Per Cell we have a reference to this deeper byte array B (chunk ID, integer), + * offset in bytes in B (integer), length in bytes in B (integer) and seqID of the cell (long). + * In order to save reference to byte array we use the Chunk's ID given by ChunkCreator. + * + * The CellChunkMap memory layout on chunk A relevant to a deeper byte array B, + * holding the actual cell data: + * + * < header > <--------------- first Cell -----------------> <-- second Cell ... + * --------------------------------------------------------------------------------------- ... + * integer | integer | integer | integer | long | + * 4 bytes | 4 bytes | 4 bytes | 4 bytes | 8 bytes | + * ChunkID | chunkID of | offset in B | length of | sequence | ... + * of this | chunk B with | where Cell's | Cell's | ID of | + * chunk A | Cell data | data starts | data in B | the Cell | + * --------------------------------------------------------------------------------------- ... + */ +@InterfaceAudience.Private +public class CellChunkMap extends CellFlatMap { + + private final Chunk[] chunks; // the array of chunks, on which the index is based + private final int numOfCellsInsideChunk; // constant number of cell-representations in a chunk + + // each cell-representation requires three integers for chunkID (reference to the ByteBuffer), + // offset and length, and one long for seqID + public static final int SIZEOF_CELL_REP = 3*Bytes.SIZEOF_INT + Bytes.SIZEOF_LONG ; + + /** + * C-tor for creating CellChunkMap from existing Chunk array, which must be ordered + * (decreasingly or increasingly according to parameter "descending") + * @param comparator a tool for comparing cells + * @param chunks ordered array of index chunk with cell representations + * @param min the index of the first cell (usually 0) + * @param max number of Cells or the index of the cell after the maximal cell + * @param descending the order of the given array + */ + public CellChunkMap(Comparator comparator, + Chunk[] chunks, int min, int max, boolean descending) { + super(comparator, min, max, descending); + this.chunks = chunks; + this.numOfCellsInsideChunk = // each chunk starts with its own ID following the cells data + (ChunkCreator.getInstance().getChunkSize() - Bytes.SIZEOF_INT) / SIZEOF_CELL_REP; + + } + + /* To be used by base (CellFlatMap) class only to create a sub-CellFlatMap + * Should be used only to create only CellChunkMap from CellChunkMap */ + @Override + protected CellFlatMap createSubCellFlatMap(int min, int max, boolean descending) { + return new CellChunkMap(this.comparator(), this.chunks, min, max, descending); + } + + + @Override + protected Cell getCell(int i) { + // get the index of the relevant chunk inside chunk array + int chunkIndex = (i / numOfCellsInsideChunk); + ByteBuffer block = chunks[chunkIndex].getData();// get the ByteBuffer of the relevant chunk + int j = i - chunkIndex * numOfCellsInsideChunk; // get the index of the cell-representation + + // find inside the offset inside the chunk holding the index, skip bytes for chunk id + int offsetInBytes = Bytes.SIZEOF_INT + j* SIZEOF_CELL_REP; + + + // find the chunk holding the data of the cell, the chunkID is stored first + int chunkId = ByteBufferUtils.toInt(block, offsetInBytes); + Chunk chunk = ChunkCreator.getInstance().getChunk(chunkId); + if (chunk == null) { + // this should not happen, putting an assertion here at least for the testing period + assert false; + } + + // find the offset of the data of the cell, skip integer for chunkID, offset is stored second + int offsetOfCell = ByteBufferUtils.toInt(block, offsetInBytes + Bytes.SIZEOF_INT); + // find the length of the data of the cell, skip two integers for chunkID and offset, + // length is stored third + int lengthOfCell = ByteBufferUtils.toInt(block, offsetInBytes + 2*Bytes.SIZEOF_INT); + // find the seqID of the cell, skip three integers for chunkID, offset, and length + // the seqID is plain written as part of the cell representation + long cellSeqID = ByteBufferUtils.toLong(block, offsetInBytes + 3*Bytes.SIZEOF_INT); + + ByteBuffer buf = chunk.getData(); // get the ByteBuffer where the cell data is stored + if (buf == null) { + // this should not happen, putting an assertion here at least for the testing period + assert false; + } + + return new ByteBufferChunkCell(buf, offsetOfCell, lengthOfCell, cellSeqID); + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java index 9f08712..0a4ea9e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/CellSet.java @@ -126,6 +126,9 @@ public class CellSet implements NavigableSet { throw new UnsupportedOperationException("Not implemented"); } + // TODO: why do we have a double traversing through map? Recall we have Cell to Cell mapping... + // First for first/last key, which actually returns Cell and then get for the same Cell? + // TODO: Consider just return the first/lastKey(), should be twice more effective... public Cell first() { return this.delegatee.get(this.delegatee.firstKey()); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java index 09877b0..5872d69 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCellFlatSet.java @@ -18,6 +18,10 @@ */ package org.apache.hadoop.hbase.regionserver; + +import java.lang.management.ManagementFactory; + +import java.nio.ByteBuffer; import java.util.Iterator; import java.util.NavigableMap; import java.util.NavigableSet; @@ -28,16 +32,30 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; + +import org.apache.hadoop.hbase.io.util.MemorySizeUtil; + + import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; - +import static org.junit.Assert.assertTrue; @Category({RegionServerTests.class, SmallTests.class}) +@RunWith(Parameterized.class) public class TestCellFlatSet extends TestCase { + @Parameterized.Parameters + public static Object[] data() { + return new Object[] { "SMALL_CHUNKS", "NORMAL_CHUNKS" }; // test with different chunk sizes + } private static final int NUM_OF_CELLS = 4; private Cell ascCells[]; private CellArrayMap ascCbOnHeap; @@ -47,8 +65,33 @@ public class TestCellFlatSet extends TestCase { private KeyValue lowerOuterCell; private KeyValue upperOuterCell; + + private CellChunkMap ascCCM; // for testing ascending CellChunkMap with one chunk in array + private CellChunkMap descCCM; // for testing descending CellChunkMap with one chunk in array + private CellChunkMap ascMultCCM; // testing ascending CellChunkMap with multiple chunks in array + private CellChunkMap descMultCCM;// testing descending CellChunkMap with multiple chunks in array + private static ChunkCreator chunkCreator; + + + public TestCellFlatSet(String chunkType){ + long globalMemStoreLimit = (long) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage() + .getMax() * MemorySizeUtil.getGlobalMemStoreHeapPercent(CONF, false)); + if (chunkType == "NORMAL_CHUNKS") { + chunkCreator = ChunkCreator.initialize(MemStoreLABImpl.CHUNK_SIZE_DEFAULT, false, + globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null); + assertTrue(chunkCreator != null); + } else { + // chunkCreator with smaller chunk size, so only 3 cell-representations can accommodate a chunk + chunkCreator = ChunkCreator.initialize(64, false, + globalMemStoreLimit, 0.2f, MemStoreLAB.POOL_INITIAL_SIZE_DEFAULT, null); + assertTrue(chunkCreator != null); + + } + } + + @Before @Override - protected void setUp() throws Exception { + public void setUp() throws Exception { super.setUp(); // create array of Cells to bass to the CellFlatMap under CellSet @@ -71,17 +114,45 @@ public class TestCellFlatSet extends TestCase { ascCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,ascCells,0,NUM_OF_CELLS,false); descCells = new Cell[] {kv4,kv3,kv2,kv1}; descCbOnHeap = new CellArrayMap(CellComparator.COMPARATOR,descCells,0,NUM_OF_CELLS,true); + CONF.setBoolean(MemStoreLAB.USEMSLAB_KEY, true); CONF.setFloat(MemStoreLAB.CHUNK_POOL_MAXSIZE_KEY, 0.2f); ChunkCreator.chunkPoolDisabled = false; + + // create ascending and descending CellChunkMaps + // according to parameter, once built with normal chunks and at second with small chunks + ascCCM = setUpCellChunkMap(true); + descCCM = setUpCellChunkMap(false); + + +// ascMultCCM = setUpCellChunkMap(true); +// descMultCCM = setUpCellChunkMap(false); } - /* Create and test CellSet based on CellArrayMap */ - public void testCellBlocksOnHeap() throws Exception { + /* Create and test ascending CellSet based on CellArrayMap */ + @Test + public void testCellArrayMapAsc() throws Exception { CellSet cs = new CellSet(ascCbOnHeap); testCellBlocks(cs); testIterators(cs); } + + /* Create and test ascending and descending CellSet based on CellChunkMap */ + @Test + public void testCellChunkMap() throws Exception { + CellSet cs = new CellSet(ascCCM); + testCellBlocks(cs); + testIterators(cs); + testSubSet(cs); + cs = new CellSet(descCCM); + testSubSet(cs); +// cs = new CellSet(ascMultCCM); +// testCellBlocks(cs); +// testSubSet(cs); +// cs = new CellSet(descMultCCM); +// testSubSet(cs); + } + @Test public void testAsc() throws Exception { CellSet ascCs = new CellSet(ascCbOnHeap); @@ -148,7 +219,7 @@ public class TestCellFlatSet extends TestCase { assertEquals(NUM_OF_CELLS, cs.size()); // check size assertFalse(cs.contains(outerCell)); // check outer cell - assertTrue(cs.contains(ascCells[0])); // check existence of the first + assertTrue(cs.contains(ascCells[0])); // check existence of the first Cell first = cs.first(); assertTrue(ascCells[0].equals(first)); @@ -200,4 +271,51 @@ public class TestCellFlatSet extends TestCase { } assertEquals(NUM_OF_CELLS, count); } + + /* Create CellChunkMap with four cells inside the index chunk */ + private CellChunkMap setUpCellChunkMap(boolean asc) { + + // allocate new chunks and use the data chunk to hold the full data of the cells + // and the index chunk to hold the cell-representations + Chunk dataChunk = chunkCreator.getChunk(); + Chunk idxChunk = chunkCreator.getChunk(); + // the array of index chunks to be used as a basis for CellChunkMap + Chunk chunkArray[] = new Chunk[8]; // according to test currently written 8 is way enough + int chunkArrayIdx = 0; + chunkArray[chunkArrayIdx++] = idxChunk; + + ByteBuffer idxBuffer = idxChunk.getData(); // the buffers of the chunks + ByteBuffer dataBuffer = dataChunk.getData(); + int dataOffset = Bytes.SIZEOF_INT; // offset inside data buffer + int idxOffset = Bytes.SIZEOF_INT; // skip the space for chunk ID + + Cell[] cellArray = asc ? ascCells : descCells; + + for (Cell kv: cellArray) { + // do we have enough space to write the cell data on the data chunk? + if (dataOffset + KeyValueUtil.length(kv) > chunkCreator.getChunkSize()) { + dataChunk = chunkCreator.getChunk(); // allocate more data chunks if needed + dataBuffer = dataChunk.getData(); + dataOffset = Bytes.SIZEOF_INT; + } + int dataStartOfset = dataOffset; + dataOffset = KeyValueUtil.appendTo(kv, dataBuffer, dataOffset, false); // write deep cell data + + // do we have enough space to write the cell-representation on the index chunk? + if (idxOffset + CellChunkMap.SIZEOF_CELL_REP > chunkCreator.getChunkSize()) { + idxChunk = chunkCreator.getChunk(); // allocate more index chunks if needed + idxBuffer = idxChunk.getData(); + idxOffset = Bytes.SIZEOF_INT; + chunkArray[chunkArrayIdx++] = idxChunk; + } + idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataChunk.getId()); // write data chunk id + idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, dataStartOfset); // offset + idxOffset = ByteBufferUtils.putInt(idxBuffer, idxOffset, KeyValueUtil.length(kv)); // length + idxOffset = ByteBufferUtils.putLong(idxBuffer, idxOffset, kv.getSequenceId()); // seqId + } + + return asc ? + new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,false) : + new CellChunkMap(CellComparator.COMPARATOR,chunkArray,0,NUM_OF_CELLS,true); + } } -- 1.8.5.2 (Apple Git-48)