.../java/org/apache/hadoop/hbase/util/Bytes.java | 43 +- .../hadoop/hbase/io/HalfStoreFileReader.java | 10 +- .../org/apache/hadoop/hbase/io/hfile/HFile.java | 4 +- .../hadoop/hbase/io/hfile/HFileBlockIndex.java | 588 ++++++++++++++------- .../hadoop/hbase/io/hfile/HFilePrettyPrinter.java | 2 +- .../hadoop/hbase/io/hfile/HFileReaderImpl.java | 21 +- .../hbase/regionserver/HRegionFileSystem.java | 5 +- .../apache/hadoop/hbase/regionserver/HStore.java | 4 +- .../hadoop/hbase/regionserver/StoreFile.java | 18 +- .../hadoop/hbase/util/CompoundBloomFilter.java | 6 +- .../org/apache/hadoop/hbase/util/HBaseFsck.java | 8 +- .../hadoop/hbase/io/TestHalfStoreFileReader.java | 12 +- .../hadoop/hbase/io/hfile/TestHFileBlockIndex.java | 13 +- .../hadoop/hbase/io/hfile/TestHFileSeek.java | 2 +- .../hadoop/hbase/io/hfile/TestHFileWriterV2.java | 7 +- .../hadoop/hbase/io/hfile/TestHFileWriterV3.java | 7 +- .../hadoop/hbase/regionserver/TestStoreFile.java | 10 +- 17 files changed, 464 insertions(+), 296 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index 7d678fd..0fe035c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -2075,24 +2075,6 @@ public class Bytes implements Comparable { } /** - * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR - * - * @param arr array of byte arrays to search for - * @param key the key you want to find - * @param offset the offset in the key you want to find - * @param length the length of the key - * @return zero-based index of the key, if the key is present in the array. - * Otherwise, a value -(i + 1) such that the key is between arr[i - - * 1] and arr[i] non-inclusively, where i is in [0, i], if we define - * arr[-1] = -Inf and arr[N] = Inf for an N-element array. The above - * means that this function can return 2N + 1 different values - * ranging from -(N + 1) to N - 1. - */ - public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { - return binarySearch(arr, key, offset, length, (CellComparator) null); - } - - /** * Binary search for keys in indexes. * * @param arr array of byte arrays to search for @@ -2111,11 +2093,11 @@ public class Bytes implements Comparable { @Deprecated public static int binarySearch(byte [][]arr, byte []key, int offset, int length, RawComparator comparator) { - return binarySearch(arr, key, offset, length, (CellComparator)null); + return binarySearch(arr, key, offset, length); } /** - * Binary search for keys in indexes. + * Binary search for keys in indexes using Bytes.BYTES_RAWCOMPARATOR. * * @param arr array of byte arrays to search for * @param key the key you want to find @@ -2129,23 +2111,18 @@ public class Bytes implements Comparable { * means that this function can return 2N + 1 different values * ranging from -(N + 1) to N - 1. */ - public static int binarySearch(byte [][]arr, byte []key, int offset, - int length, CellComparator comparator) { + public static int binarySearch(byte[][] arr, byte[] key, int offset, int length) { int low = 0; int high = arr.length - 1; KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); r.setKey(key, offset, length); while (low <= high) { - int mid = (low+high) >>> 1; + int mid = (low + high) >>> 1; // we have to compare in this order, because the comparator order // has special logic when the 'left side' is a special key. - int cmp = 0; - if (comparator != null) { - cmp = comparator.compare(r, arr[mid], 0, arr[mid].length); - } else { - cmp = Bytes.BYTES_RAWCOMPARATOR.compare(key, offset, length, arr[mid], 0, arr[mid].length); - } + int cmp = Bytes.BYTES_RAWCOMPARATOR + .compare(key, offset, length, arr[mid], 0, arr[mid].length); // key lives above the midpoint if (cmp > 0) low = mid + 1; @@ -2156,7 +2133,7 @@ public class Bytes implements Comparable { else return mid; } - return - (low+1); + return -(low + 1); } /** @@ -2212,16 +2189,14 @@ public class Bytes implements Comparable { * ranging from -(N + 1) to N - 1. * @return the index of the block */ - public static int binarySearch(byte[][] arr, Cell key, Comparator comparator) { + public static int binarySearch(Cell[] arr, Cell key, Comparator comparator) { int low = 0; int high = arr.length - 1; - KeyValue.KeyOnlyKeyValue r = new KeyValue.KeyOnlyKeyValue(); while (low <= high) { int mid = (low+high) >>> 1; // we have to compare in this order, because the comparator order // has special logic when the 'left side' is a special key. - r.setKey(arr[mid], 0, arr[mid].length); - int cmp = comparator.compare(key, r); + int cmp = comparator.compare(key, arr[mid]); // key lives above the midpoint if (cmp > 0) low = mid + 1; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java index a95da7b..420e398 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/HalfStoreFileReader.java @@ -59,7 +59,7 @@ public class HalfStoreFileReader extends StoreFile.Reader { protected final Cell splitCell; - private byte[] firstKey = null; + private Cell firstKey = null; private boolean firstKeySeeked = false; @@ -262,7 +262,7 @@ public class HalfStoreFileReader extends StoreFile.Reader { @Override public boolean seekBefore(Cell key) throws IOException { if (top) { - Cell fk = new KeyValue.KeyOnlyKeyValue(getFirstKey(), 0, getFirstKey().length); + Cell fk = getFirstKey(); if (getComparator().compareKeyIgnoresMvcc(key, fk) <= 0) { return false; } @@ -314,18 +314,18 @@ public class HalfStoreFileReader extends StoreFile.Reader { } @Override - public byte[] midkey() throws IOException { + public Cell midkey() throws IOException { // Returns null to indicate file is not splitable. return null; } @Override - public byte[] getFirstKey() { + public Cell getFirstKey() { if (!firstKeySeeked) { HFileScanner scanner = getScanner(true, true, false); try { if (scanner.seekTo()) { - this.firstKey = Bytes.toBytes(scanner.getKey()); + this.firstKey = new KeyValue.KeyOnlyKeyValue(Bytes.toBytes(scanner.getKey())); } firstKeySeeked = true; } catch (IOException e) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 6c8260d..35458a2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -394,13 +394,13 @@ public class HFile { byte[] getLastKey(); - byte[] midkey() throws IOException; + Cell midkey() throws IOException; long length(); long getEntries(); - byte[] getFirstKey(); + Cell getFirstKey(); long indexSize(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java index e6e1fff..33ce5d5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java @@ -37,6 +37,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; @@ -96,124 +97,205 @@ public class HFileBlockIndex { 2 * Bytes.SIZEOF_INT; /** - * The reader will always hold the root level index in the memory. Index - * blocks at all other levels will be cached in the LRU cache in practice, - * although this API does not enforce that. - * - * All non-root (leaf and intermediate) index blocks contain what we call a - * "secondary index": an array of offsets to the entries within the block. - * This allows us to do binary search for the entry corresponding to the - * given key without having to deserialize the block. + * An implementation of the BlockIndexReader that deals with block keys which are plain + * byte[] like MetaBlock or the Bloom Block for ROW bloom. + * Does not need a comparator. It can work on Bytes.BYTES_RAWCOMPARATOR */ - public static class BlockIndexReader implements HeapSize { - /** Needed doing lookup on blocks. */ - private final CellComparator comparator; + public static class ByteArrayKeyBlockIndexReader extends BlockIndexReader { - // Root-level data. - // TODO : Convert these to Cells (ie) KeyValue.KeyOnlyKV private byte[][] blockKeys; - private long[] blockOffsets; - private int[] blockDataSizes; - private int rootCount = 0; - - // Mid-key metadata. - private long midLeafBlockOffset = -1; - private int midLeafBlockOnDiskSize = -1; - private int midKeyEntry = -1; - /** Pre-computed mid-key */ - private AtomicReference midKey = new AtomicReference(); + public ByteArrayKeyBlockIndexReader(final CellComparator c, final int treeLevel, + final CachingBlockReader cachingBlockReader) { + super(c, treeLevel, cachingBlockReader); + } - /** - * The number of levels in the block index tree. One if there is only root - * level, two for root and leaf levels, etc. - */ - private int searchTreeLevel; + public ByteArrayKeyBlockIndexReader(final CellComparator c, final int treeLevel) { + super(c, treeLevel); + } - /** A way to read {@link HFile} blocks at a given offset */ - private CachingBlockReader cachingBlockReader; + protected long calculateHeapSizeForBlockKeys(long heapSize) { + // Calculating the size of blockKeys + if (blockKeys != null) { + heapSize += ClassSize.REFERENCE; + // Adding array + references overhead + heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE); - public BlockIndexReader(final CellComparator c, final int treeLevel, - final CachingBlockReader cachingBlockReader) { - this(c, treeLevel); - this.cachingBlockReader = cachingBlockReader; + // Adding bytes + for (byte[] key : blockKeys) { + heapSize += ClassSize.align(ClassSize.ARRAY + key.length); + } + } + return heapSize; } - public BlockIndexReader(final CellComparator c, final int treeLevel) - { - // Can be null for METAINDEX block - comparator = c; - searchTreeLevel = treeLevel; + @Override + public void ensureNonEmpty() { + if (blockKeys.length == 0) { + throw new IllegalStateException("Block index is empty or not loaded"); + } } - /** - * @return true if the block index is empty. - */ + @Override public boolean isEmpty() { return blockKeys.length == 0; } /** - * Verifies that the block index is non-empty and throws an - * {@link IllegalStateException} otherwise. + * @param i + * from 0 to {@link #getRootBlockCount() - 1} */ + public byte[] getRootBlockKey(int i) { + return blockKeys[i]; + } + + @Override + public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, + boolean cacheBlocks, boolean pread, boolean isCompaction, + DataBlockEncoding expectedDataBlockEncoding) throws IOException { + // this would not be needed + return null; + } + + @Override + public Cell midkey() throws IOException { + // Not needed here + return null; + } + + @Override + public void readRootIndex(DataInput in, int numEntries) throws IOException { + blockOffsets = new long[numEntries]; + blockKeys = new byte[numEntries][]; + blockDataSizes = new int[numEntries]; + + // If index size is zero, no index was written. + if (numEntries > 0) { + for (int i = 0; i < numEntries; ++i) { + long offset = in.readLong(); + int dataSize = in.readInt(); + byte[] key = Bytes.readByteArray(in); + add(key, offset, dataSize); + } + } + + } + + private void add(final byte[] key, final long offset, final int dataSize) { + blockOffsets[rootCount] = offset; + blockKeys[rootCount] = key; + blockDataSizes[rootCount] = dataSize; + rootCount++; + } + + @Override + public int rootBlockContainingKey(byte[] key, int offset, int length, CellComparator comp) { + int pos = Bytes.binarySearch(blockKeys, key, offset, length); + return returnFinalIndex(pos); + } + + @Override + public int rootBlockContainingKey(Cell key) { + // Should not be called on this because here it deals only with byte[] + return -1; + } + + protected int returnFinalIndex(int pos) { + // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see + // binarySearch's javadoc. + + if (pos >= 0) { + // This means this is an exact match with an element of blockKeys. + assert pos < blockKeys.length; + return pos; + } + + // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], + // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that + // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if + // key < blockKeys[0], meaning the file does not contain the given key. + + int i = -pos - 1; + assert 0 <= i && i <= blockKeys.length; + return i - 1; + } + + @Override + public DataInputStream readRootIndex(HFileBlock blk, int numEntries) throws IOException { + DataInputStream in = blk.getByteStream(); + readRootIndex(in, numEntries); + return in; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("size=" + rootCount).append("\n"); + for (int i = 0; i < rootCount; i++) { + sb.append("key=").append(KeyValue.keyToString(blockKeys[i])) + .append("\n offset=").append(blockOffsets[i]) + .append(", dataSize=" + blockDataSizes[i]).append("\n"); + } + return sb.toString(); + } + + } + + /** + * An implementation of the BlockIndexReader that deals with block keys which are KeyValue's + * key byte[] like the Data block index or the ROW_COL bloom blocks + * This needs a comparator to work with the Cells + */ + public static class CellBasedKeyBlockIndexReader extends BlockIndexReader { + + private Cell[] blockKeys; + + public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel, + final CachingBlockReader cachingBlockReader) { + super(c, treeLevel, cachingBlockReader); + } + + public CellBasedKeyBlockIndexReader(final CellComparator c, final int treeLevel) { + super(c, treeLevel); + } + + protected long calculateHeapSizeForBlockKeys(long heapSize) { + if (blockKeys != null) { + heapSize += ClassSize.REFERENCE; + // Adding array + references overhead + heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length * ClassSize.REFERENCE); + + // Adding blockKeys + for (Cell key : blockKeys) { + heapSize += ClassSize.align(CellUtil.estimatedHeapSizeOf(key)); + } + } + return heapSize; + } + + @Override public void ensureNonEmpty() { if (blockKeys.length == 0) { throw new IllegalStateException("Block index is empty or not loaded"); } } - /** - * Return the data block which contains this key. This function will only - * be called when the HFile version is larger than 1. - * - * @param key the key we are looking for - * @param currentBlock the current block, to avoid re-reading the same block - * @param cacheBlocks - * @param pread - * @param isCompaction - * @param expectedDataBlockEncoding the data block encoding the caller is - * expecting the data block to be in, or null to not perform this - * check and return the block irrespective of the encoding - * @return reader a basic way to load blocks - * @throws IOException - */ - public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks, - boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding) - throws IOException { - BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock, - cacheBlocks, - pread, isCompaction, expectedDataBlockEncoding); - if (blockWithScanInfo == null) { - return null; - } else { - return blockWithScanInfo.getHFileBlock(); - } + @Override + public boolean isEmpty() { + return blockKeys.length == 0; } /** - * Return the BlockWithScanInfo which contains the DataBlock with other scan - * info such as nextIndexedKey. This function will only be called when the - * HFile version is larger than 1. - * - * @param key - * the key we are looking for - * @param currentBlock - * the current block, to avoid re-reading the same block - * @param cacheBlocks - * @param pread - * @param isCompaction - * @param expectedDataBlockEncoding the data block encoding the caller is - * expecting the data block to be in, or null to not perform this - * check and return the block irrespective of the encoding. - * @return the BlockWithScanInfo which contains the DataBlock with other - * scan info such as nextIndexedKey. - * @throws IOException + * @param i + * from 0 to {@link #getRootBlockCount() - 1} */ + public Cell getRootBlockKey(int i) { + return blockKeys[i]; + } + @Override public BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, - boolean cacheBlocks, - boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding) - throws IOException { + boolean cacheBlocks, boolean pread, boolean isCompaction, + DataBlockEncoding expectedDataBlockEncoding) throws IOException { int rootLevelIndex = rootBlockContainingKey(key); if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { return null; @@ -227,7 +309,7 @@ public class HFileBlockIndex { int currentOnDiskSize = blockDataSizes[rootLevelIndex]; if (rootLevelIndex < blockKeys.length - 1) { - nextIndexedKey = new KeyValue.KeyOnlyKeyValue(blockKeys[rootLevelIndex + 1]); + nextIndexedKey = blockKeys[rootLevelIndex + 1]; } else { nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY; } @@ -314,18 +396,12 @@ public class HFileBlockIndex { return blockWithScanInfo; } - /** - * An approximation to the {@link HFile}'s mid-key. Operates on block - * boundaries, and does not go inside blocks. In other words, returns the - * first key of the middle block of the file. - * - * @return the first key of the middle block - */ - public byte[] midkey() throws IOException { + @Override + public Cell midkey() throws IOException { if (rootCount == 0) throw new IOException("HFile empty"); - byte[] targetMidKey = this.midKey.get(); + Cell targetMidKey = this.midKey.get(); if (targetMidKey != null) { return targetMidKey; } @@ -348,7 +424,8 @@ public class HFileBlockIndex { keyRelOffset; int keyOffset = Bytes.SIZEOF_INT * (numDataBlocks + 2) + keyRelOffset + SECONDARY_INDEX_ENTRY_OVERHEAD; - targetMidKey = ByteBufferUtils.toBytes(b, keyOffset, keyLen); + byte[] bytes = ByteBufferUtils.toBytes(b, keyOffset, keyLen); + targetMidKey = new KeyValue.KeyOnlyKeyValue(bytes, 0, bytes.length); } else { // The middle of the root-level index. targetMidKey = blockKeys[rootCount / 2]; @@ -358,14 +435,210 @@ public class HFileBlockIndex { return targetMidKey; } + @Override + public void readRootIndex(DataInput in, int numEntries) throws IOException { + blockOffsets = new long[numEntries]; + blockKeys = new Cell[numEntries]; + blockDataSizes = new int[numEntries]; + + // If index size is zero, no index was written. + if (numEntries > 0) { + for (int i = 0; i < numEntries; ++i) { + long offset = in.readLong(); + int dataSize = in.readInt(); + byte[] key = Bytes.readByteArray(in); + add(key, offset, dataSize); + } + } + + } + /** - * @param i from 0 to {@link #getRootBlockCount() - 1} + * Adds a new entry in the root block index. Only used when reading. + * + * @param key Last key in the block + * @param offset file offset where the block is stored + * @param dataSize the uncompressed data size */ - public byte[] getRootBlockKey(int i) { - return blockKeys[i]; + private void add(final byte[] key, final long offset, final int dataSize) { + blockOffsets[rootCount] = offset; + // Create the blockKeys as Cells once when the reader is opened + blockKeys[rootCount] = new KeyValue.KeyOnlyKeyValue(key, 0, key.length); + blockDataSizes[rootCount] = dataSize; + rootCount++; + } + @Override + public int rootBlockContainingKey(final byte[] key, int offset, int length, + CellComparator comp) { + // This should always be called with Cell not with a byte[] key + return -1; + } + + @Override + public int rootBlockContainingKey(Cell key) { + // Here the comparator should not be null as this happens for the root-level block + int pos = Bytes.binarySearch(blockKeys, key, comparator); + return returnFinalIndex(pos); + } + + protected int returnFinalIndex(int pos) { + // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see + // binarySearch's javadoc. + + if (pos >= 0) { + // This means this is an exact match with an element of blockKeys. + assert pos < blockKeys.length; + return pos; + } + + // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], + // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that + // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if + // key < blockKeys[0], meaning the file does not contain the given key. + + int i = -pos - 1; + assert 0 <= i && i <= blockKeys.length; + return i - 1; + } + + @Override + public DataInputStream readRootIndex(HFileBlock blk, int numEntries) throws IOException { + DataInputStream in = blk.getByteStream(); + readRootIndex(in, numEntries); + return in; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("size=" + rootCount).append("\n"); + for (int i = 0; i < rootCount; i++) { + sb.append("key=").append((blockKeys[i])) + .append("\n offset=").append(blockOffsets[i]) + .append(", dataSize=" + blockDataSizes[i]).append("\n"); + } + return sb.toString(); + } + } + /** + * The reader will always hold the root level index in the memory. Index + * blocks at all other levels will be cached in the LRU cache in practice, + * although this API does not enforce that. + * + * All non-root (leaf and intermediate) index blocks contain what we call a + * "secondary index": an array of offsets to the entries within the block. + * This allows us to do binary search for the entry corresponding to the + * given key without having to deserialize the block. + */ + public static abstract class BlockIndexReader implements HeapSize { + /** Needed doing lookup on blocks. */ + protected CellComparator comparator; + + protected long[] blockOffsets; + protected int[] blockDataSizes; + protected int rootCount = 0; + + // Mid-key metadata. + protected long midLeafBlockOffset = -1; + protected int midLeafBlockOnDiskSize = -1; + protected int midKeyEntry = -1; + + /** Pre-computed mid-key */ + protected AtomicReference midKey = new AtomicReference(); + + /** + * The number of levels in the block index tree. One if there is only root + * level, two for root and leaf levels, etc. + */ + protected int searchTreeLevel; + + /** A way to read {@link HFile} blocks at a given offset */ + protected CachingBlockReader cachingBlockReader; + + /** + * @return true if the block index is empty. + */ + public abstract boolean isEmpty(); + + /** + * Verifies that the block index is non-empty and throws an + * {@link IllegalStateException} otherwise. + */ + public abstract void ensureNonEmpty(); + + public BlockIndexReader(final CellComparator c, final int treeLevel, + final CachingBlockReader cachingBlockReader) { + this(c, treeLevel); + this.cachingBlockReader = cachingBlockReader; + } + + public BlockIndexReader(final CellComparator c, final int treeLevel) { + // Can be null for METAINDEX block + comparator = c; + searchTreeLevel = treeLevel; + } + /** + * Return the data block which contains this key. This function will only + * be called when the HFile version is larger than 1. + * + * @param key the key we are looking for + * @param currentBlock the current block, to avoid re-reading the same block + * @param cacheBlocks + * @param pread + * @param isCompaction + * @param expectedDataBlockEncoding the data block encoding the caller is + * expecting the data block to be in, or null to not perform this + * check and return the block irrespective of the encoding + * @return reader a basic way to load blocks + * @throws IOException + */ + public HFileBlock seekToDataBlock(final Cell key, HFileBlock currentBlock, boolean cacheBlocks, + boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding) + throws IOException { + BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, currentBlock, + cacheBlocks, + pread, isCompaction, expectedDataBlockEncoding); + if (blockWithScanInfo == null) { + return null; + } else { + return blockWithScanInfo.getHFileBlock(); + } } /** + * Return the BlockWithScanInfo which contains the DataBlock with other scan + * info such as nextIndexedKey. This function will only be called when the + * HFile version is larger than 1. + * + * @param key + * the key we are looking for + * @param currentBlock + * the current block, to avoid re-reading the same block + * @param cacheBlocks + * @param pread + * @param isCompaction + * @param expectedDataBlockEncoding the data block encoding the caller is + * expecting the data block to be in, or null to not perform this + * check and return the block irrespective of the encoding. + * @return the BlockWithScanInfo which contains the DataBlock with other + * scan info such as nextIndexedKey. + * @throws IOException + */ + public abstract BlockWithScanInfo loadDataBlockWithScanInfo(Cell key, HFileBlock currentBlock, + boolean cacheBlocks, + boolean pread, boolean isCompaction, DataBlockEncoding expectedDataBlockEncoding) + throws IOException; + + /** + * An approximation to the {@link HFile}'s mid-key. Operates on block + * boundaries, and does not go inside blocks. In other words, returns the + * first key of the middle block of the file. + * + * @return the first key of the middle block + */ + public abstract Cell midkey() throws IOException; + + /** * @param i from 0 to {@link #getRootBlockCount() - 1} */ public long getRootBlockOffset(int i) { @@ -402,27 +675,8 @@ public class HFileBlockIndex { // When we want to find the meta index block or bloom block for ROW bloom // type Bytes.BYTES_RAWCOMPARATOR would be enough. For the ROW_COL bloom case we need the // CellComparator. - public int rootBlockContainingKey(final byte[] key, int offset, int length, - CellComparator comp) { - int pos = Bytes.binarySearch(blockKeys, key, offset, length, comp); - // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see - // binarySearch's javadoc. - - if (pos >= 0) { - // This means this is an exact match with an element of blockKeys. - assert pos < blockKeys.length; - return pos; - } - - // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], - // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that - // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if - // key < blockKeys[0], meaning the file does not contain the given key. - - int i = -pos - 1; - assert 0 <= i && i <= blockKeys.length; - return i - 1; - } + public abstract int rootBlockContainingKey(final byte[] key, int offset, int length, + CellComparator comp); /** * Finds the root-level index block containing the given key. @@ -447,41 +701,7 @@ public class HFileBlockIndex { * @param key * Key to find */ - public int rootBlockContainingKey(final Cell key) { - // Here the comparator should not be null as this happens for the root-level block - int pos = Bytes.binarySearch(blockKeys, key, comparator); - // pos is between -(blockKeys.length + 1) to blockKeys.length - 1, see - // binarySearch's javadoc. - - if (pos >= 0) { - // This means this is an exact match with an element of blockKeys. - assert pos < blockKeys.length; - return pos; - } - - // Otherwise, pos = -(i + 1), where blockKeys[i - 1] < key < blockKeys[i], - // and i is in [0, blockKeys.length]. We are returning j = i - 1 such that - // blockKeys[j] <= key < blockKeys[j + 1]. In particular, j = -1 if - // key < blockKeys[0], meaning the file does not contain the given key. - - int i = -pos - 1; - assert 0 <= i && i <= blockKeys.length; - return i - 1; - } - - /** - * Adds a new entry in the root block index. Only used when reading. - * - * @param key Last key in the block - * @param offset file offset where the block is stored - * @param dataSize the uncompressed data size - */ - private void add(final byte[] key, final long offset, final int dataSize) { - blockOffsets[rootCount] = offset; - blockKeys[rootCount] = key; - blockDataSizes[rootCount] = dataSize; - rootCount++; - } + public abstract int rootBlockContainingKey(final Cell key); /** * The indexed key at the ith position in the nonRootIndex. The position starts at 0. @@ -489,7 +709,7 @@ public class HFileBlockIndex { * @param i the ith position * @return The indexed key at the ith position in the nonRootIndex. */ - private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) { + protected byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) { int numEntries = nonRootIndex.getInt(0); if (i < 0 || i >= numEntries) { return null; @@ -653,22 +873,8 @@ public class HFileBlockIndex { * @param numEntries the number of root-level index entries * @throws IOException */ - public void readRootIndex(DataInput in, final int numEntries) - throws IOException { - blockOffsets = new long[numEntries]; - blockKeys = new byte[numEntries][]; - blockDataSizes = new int[numEntries]; - - // If index size is zero, no index was written. - if (numEntries > 0) { - for (int i = 0; i < numEntries; ++i) { - long offset = in.readLong(); - int dataSize = in.readInt(); - byte[] key = Bytes.readByteArray(in); - add(key, offset, dataSize); - } - } - } + public abstract void readRootIndex(DataInput in, final int numEntries) + throws IOException; /** * Read in the root-level index from the given input stream. Must match @@ -712,36 +918,15 @@ public class HFileBlockIndex { } @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("size=" + rootCount).append("\n"); - for (int i = 0; i < rootCount; i++) { - sb.append("key=").append(KeyValue.keyToString(blockKeys[i])) - .append("\n offset=").append(blockOffsets[i]) - .append(", dataSize=" + blockDataSizes[i]).append("\n"); - } - return sb.toString(); - } - - @Override public long heapSize() { - long heapSize = ClassSize.align(6 * ClassSize.REFERENCE + + // The BlockIndexReader does not have the blockKey + long heapSize = ClassSize.align(5 * ClassSize.REFERENCE + 2 * Bytes.SIZEOF_INT + ClassSize.OBJECT); // Mid-key metadata. heapSize += MID_KEY_METADATA_SIZE; - // Calculating the size of blockKeys - if (blockKeys != null) { - // Adding array + references overhead - heapSize += ClassSize.align(ClassSize.ARRAY + blockKeys.length - * ClassSize.REFERENCE); - - // Adding bytes - for (byte[] key : blockKeys) { - heapSize += ClassSize.align(ClassSize.ARRAY + key.length); - } - } + heapSize = calculateHeapSizeForBlockKeys(heapSize); if (blockOffsets != null) { heapSize += ClassSize.align(ClassSize.ARRAY + blockOffsets.length @@ -756,6 +941,7 @@ public class HFileBlockIndex { return ClassSize.align(heapSize); } + protected abstract long calculateHeapSizeForBlockKeys(long heapSize); } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java index 7cc31d0..c8c8e69 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFilePrettyPrinter.java @@ -410,7 +410,7 @@ public class HFilePrettyPrinter extends Configured implements Tool { } try { - System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey())); + System.out.println("Mid-key: " + (reader.midkey())); } catch (Exception e) { System.out.println ("Unable to retrieve the midkey"); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java index 4d1881d..4ce5a4c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderImpl.java @@ -54,6 +54,7 @@ import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.ByteBufferUtils; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.IdLock; +import org.apache.hadoop.hbase.util.SimpleMutableByteRange; import org.apache.hadoop.io.WritableUtils; import org.apache.htrace.Trace; import org.apache.htrace.TraceScope; @@ -189,9 +190,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // Comparator class name is stored in the trailer in version 2. comparator = trailer.createComparator(); - dataBlockIndexReader = new HFileBlockIndex.BlockIndexReader(comparator, + dataBlockIndexReader = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels(), this); - metaBlockIndexReader = new HFileBlockIndex.BlockIndexReader( + metaBlockIndexReader = new HFileBlockIndex.ByteArrayKeyBlockIndexReader( null, 1); // Parse load-on-open data. @@ -309,7 +310,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { } private String toStringFirstKey() { - return KeyValue.keyToString(getFirstKey()); + return getFirstKey().toString(); } private String toStringLastKey() { @@ -341,12 +342,12 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * first KeyValue. */ @Override - public byte [] getFirstKey() { + public Cell getFirstKey() { if (dataBlockIndexReader == null) { throw new BlockIndexNotLoadedException(); } return dataBlockIndexReader.isEmpty() ? null - : dataBlockIndexReader.getRootBlockKey(0); + : ((HFileBlockIndex.CellBasedKeyBlockIndexReader) dataBlockIndexReader).getRootBlockKey(0); } /** @@ -357,8 +358,9 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { */ @Override public byte[] getFirstRowKey() { - byte[] firstKey = getFirstKey(); - return firstKey == null? null: KeyValueUtil.createKeyValueFromKey(firstKey).getRow(); + Cell firstKey = getFirstKey(); + // We have to copy the row part to form the row key alone + return Bytes.copy(firstKey.getRowArray(), firstKey.getRowOffset(), firstKey.getRowLength()); } /** @@ -1210,7 +1212,8 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { // Per meta key from any given file, synchronize reads for said block. This // is OK to do for meta blocks because the meta block index is always // single-level. - synchronized (metaBlockIndexReader.getRootBlockKey(block)) { + synchronized (((HFileBlockIndex.ByteArrayKeyBlockIndexReader) metaBlockIndexReader) + .getRootBlockKey(block)) { // Check cache for block. If found return. long metaBlockOffset = metaBlockIndexReader.getRootBlockOffset(block); BlockCacheKey cacheKey = new BlockCacheKey(name, metaBlockOffset); @@ -1382,7 +1385,7 @@ public class HFileReaderImpl implements HFile.Reader, Configurable { * @throws IOException */ @Override - public byte[] midkey() throws IOException { + public Cell midkey() throws IOException { return dataBlockIndexReader.midkey(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index ee2644d..f4eaaf9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; @@ -597,12 +598,12 @@ public class HRegionFileSystem { } else { //check if smaller than first key KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow); - byte[] firstKey = f.createReader().getFirstKey(); + Cell firstKey = f.createReader().getFirstKey(); // If firstKey is null means storefile is empty. if (firstKey == null) { return null; } - if (f.getReader().getComparator().compare(splitKey, firstKey, 0, firstKey.length) < 0) { + if (f.getReader().getComparator().compare(splitKey, firstKey) < 0) { return null; } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java index 5d7248d..105ef21 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HStore.java @@ -1849,9 +1849,9 @@ public class HStore implements Store { return false; } // TODO: Cache these keys rather than make each time? - byte [] fk = r.getFirstKey(); + Cell fk = r.getFirstKey(); if (fk == null) return false; - KeyValue firstKV = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length); + KeyValue firstKV = (KeyValue) fk; byte [] lk = r.getLastKey(); KeyValue lastKV = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length); KeyValue firstOnRow = state.getTargetKey(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java index eba3689..c62a19c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java @@ -678,21 +678,20 @@ public class StoreFile { // Get first, last, and mid keys. Midkey is the key that starts block // in middle of hfile. Has column and timestamp. Need to return just // the row we want to split on as midkey. - byte [] midkey = this.reader.midkey(); + Cell midkey = this.reader.midkey(); if (midkey != null) { - KeyValue mk = KeyValueUtil.createKeyValueFromKey(midkey, 0, midkey.length); - byte [] fk = this.reader.getFirstKey(); - KeyValue firstKey = KeyValueUtil.createKeyValueFromKey(fk, 0, fk.length); + Cell firstKey = this.reader.getFirstKey(); byte [] lk = this.reader.getLastKey(); KeyValue lastKey = KeyValueUtil.createKeyValueFromKey(lk, 0, lk.length); // if the midkey is the same as the first or last keys, we cannot (ever) split this region. - if (comparator.compareRows(mk, firstKey) == 0 || comparator.compareRows(mk, lastKey) == 0) { + if (comparator.compareRows(midkey, firstKey) == 0 + || comparator.compareRows(midkey, lastKey) == 0) { if (LOG.isDebugEnabled()) { LOG.debug("cannot split because midkey is the same as first or last row"); } return null; } - return mk.getRow(); + return Bytes.copy(midkey.getRowArray(), midkey.getRowOffset(), midkey.getRowLength()); } return null; } @@ -1371,8 +1370,7 @@ public class StoreFile { .createLastOnRow(scan.getStartRow()) : KeyValueUtil.createLastOnRow(scan .getStopRow()); // TODO this is in hot path? Optimize and avoid 2 extra object creations. - KeyValue.KeyOnlyKeyValue firstKeyKV = - new KeyValue.KeyOnlyKeyValue(this.getFirstKey(), 0, this.getFirstKey().length); + Cell firstKeyKV = this.getFirstKey(); KeyValue.KeyOnlyKeyValue lastKeyKV = new KeyValue.KeyOnlyKeyValue(this.getLastKey(), 0, this.getLastKey().length); boolean nonOverLapping = ((getComparator().compare(firstKeyKV, largestScanKeyValue)) > 0 @@ -1493,7 +1491,7 @@ public class StoreFile { return reader.getLastRowKey(); } - public byte[] midkey() throws IOException { + public Cell midkey() throws IOException { return reader.midkey(); } @@ -1513,7 +1511,7 @@ public class StoreFile { return deleteFamilyCnt; } - public byte[] getFirstKey() { + public Cell getFirstKey() { return reader.getFirstKey(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java index 984742f..e72ea6a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CompoundBloomFilter.java @@ -84,7 +84,11 @@ public class CompoundBloomFilter extends CompoundBloomFilterBase throw new IllegalArgumentException("Invalid hash type: " + hashType); } // We will pass null for ROW block - index = new HFileBlockIndex.BlockIndexReader(comparator, 1); + if(comparator == null) { + index = new HFileBlockIndex.ByteArrayKeyBlockIndexReader(null, 1); + } else { + index = new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, 1); + } index.readRootIndex(meta, numChunks); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 0682786..3e164ba 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -776,8 +776,8 @@ public class HBaseFsck extends Configured implements Closeable { getConf()), getConf()); if ((reader.getFirstKey() != null) && ((storeFirstKey == null) || (comparator.compare(storeFirstKey, - reader.getFirstKey()) > 0))) { - storeFirstKey = reader.getFirstKey(); + ((KeyValue.KeyOnlyKeyValue) reader.getFirstKey()).getKey()) > 0))) { + storeFirstKey = ((KeyValue.KeyOnlyKeyValue)reader.getFirstKey()).getKey(); } if ((reader.getLastKey() != null) && ((storeLastKey == null) || (comparator.compare(storeLastKey, @@ -790,7 +790,7 @@ public class HBaseFsck extends Configured implements Closeable { } currentRegionBoundariesInformation.metaFirstKey = regionInfo.getStartKey(); currentRegionBoundariesInformation.metaLastKey = regionInfo.getEndKey(); - currentRegionBoundariesInformation.storesFirstKey = keyOnly(storeFirstKey); + currentRegionBoundariesInformation.storesFirstKey = storeFirstKey; currentRegionBoundariesInformation.storesLastKey = keyOnly(storeLastKey); if (currentRegionBoundariesInformation.metaFirstKey.length == 0) currentRegionBoundariesInformation.metaFirstKey = null; @@ -879,7 +879,7 @@ public class HBaseFsck extends Configured implements Closeable { CacheConfig cacheConf = new CacheConfig(getConf()); hf = HFile.createReader(fs, hfile.getPath(), cacheConf, getConf()); hf.loadFileInfo(); - KeyValue startKv = KeyValueUtil.createKeyValueFromKey(hf.getFirstKey()); + Cell startKv = hf.getFirstKey(); start = startKv.getRow(); KeyValue endKv = KeyValueUtil.createKeyValueFromKey(hf.getLastKey()); end = endKv.getRow(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java index 7e2f1c0..9b99502 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/TestHalfStoreFileReader.java @@ -101,9 +101,8 @@ public class TestHalfStoreFileReader { HFile.Reader r = HFile.createReader(fs, p, cacheConf, conf); r.loadFileInfo(); - byte [] midkey = r.midkey(); - KeyValue midKV = KeyValueUtil.createKeyValueFromKey(midkey); - midkey = midKV.getRow(); + Cell midKV = r.midkey(); + byte[] midkey = ((KeyValue.KeyOnlyKeyValue)midKV).getRow(); //System.out.println("midkey: " + midKV + " or: " + Bytes.toStringBinary(midkey)); @@ -167,9 +166,8 @@ public class TestHalfStoreFileReader { HFile.Reader r = HFile.createReader(fs, p, cacheConf, conf); r.loadFileInfo(); - byte[] midkey = r.midkey(); - KeyValue midKV = KeyValueUtil.createKeyValueFromKey(midkey); - midkey = midKV.getRow(); + Cell midKV = r.midkey(); + byte[] midkey = ((KeyValue.KeyOnlyKeyValue)midKV).getRow(); Reference bottom = new Reference(midkey, Reference.Range.bottom); Reference top = new Reference(midkey, Reference.Range.top); @@ -217,7 +215,7 @@ public class TestHalfStoreFileReader { assertNull(foundKeyValue); } - private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, KeyValue seekBefore, + private Cell doTestOfSeekBefore(Path p, FileSystem fs, Reference bottom, Cell seekBefore, CacheConfig cacheConfig) throws IOException { final HalfStoreFileReader halfreader = new HalfStoreFileReader(fs, p, diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java index 8891a6a..0003d00 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java @@ -205,7 +205,7 @@ public class TestHFileBlockIndex { BlockReaderWrapper brw = new BlockReaderWrapper(blockReader); HFileBlockIndex.BlockIndexReader indexReader = - new HFileBlockIndex.BlockIndexReader( + new HFileBlockIndex.CellBasedKeyBlockIndexReader( CellComparator.COMPARATOR, numLevels, brw); indexReader.readRootIndex(blockReader.blockRange(rootIndexOffset, @@ -493,16 +493,17 @@ public class TestHFileBlockIndex { long expected = ClassSize.estimateBase(cl, false); HFileBlockIndex.BlockIndexReader bi = - new HFileBlockIndex.BlockIndexReader(null, 1); + new HFileBlockIndex.ByteArrayKeyBlockIndexReader(null, 1); long actual = bi.heapSize(); // Since the arrays in BlockIndex(byte [][] blockKeys, long [] blockOffsets, // int [] blockDataSizes) are all null they are not going to show up in the // HeapSize calculation, so need to remove those array costs from expected. - expected -= ClassSize.align(3 * ClassSize.ARRAY); + // Already the block keys are not there in this case + expected -= ClassSize.align(2 * ClassSize.ARRAY); if (expected != actual) { - ClassSize.estimateBase(cl, true); + expected = ClassSize.estimateBase(cl, true); assertEquals(expected, actual); } } @@ -574,7 +575,7 @@ public class TestHFileBlockIndex { assertEquals(expectedNumLevels, reader.getTrailer().getNumDataIndexLevels()); - assertTrue(Bytes.equals(keys[0], reader.getFirstKey())); + assertTrue(Bytes.equals(keys[0], ((KeyValue)reader.getFirstKey()).getKey())); assertTrue(Bytes.equals(keys[NUM_KV - 1], reader.getLastKey())); LOG.info("Last key: " + Bytes.toStringBinary(keys[NUM_KV - 1])); @@ -631,7 +632,7 @@ public class TestHFileBlockIndex { // Validate the mid-key. assertEquals( Bytes.toStringBinary(blockKeys.get((blockKeys.size() - 1) / 2)), - Bytes.toStringBinary(reader.midkey())); + reader.midkey()); assertEquals(UNCOMPRESSED_INDEX_SIZES[testI], reader.getTrailer().getUncompressedDataIndexSize()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java index b19ec0d..d3153fc 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileSeek.java @@ -187,7 +187,7 @@ public class TestHFileSeek extends TestCase { fs.getFileStatus(path).getLen(), new CacheConfig(conf), conf); reader.loadFileInfo(); KeySampler kSampler = - new KeySampler(rng, reader.getFirstKey(), reader.getLastKey(), + new KeySampler(rng, ((KeyValue)reader.getFirstKey()).getKey(), reader.getLastKey(), keyLenGen); HFileScanner scanner = reader.getScanner(false, USE_PREAD); BytesWritable key = new BytesWritable(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java index 232d1f7..f48dc9f 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV2.java @@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.KeyValue; @@ -148,10 +149,10 @@ public class TestHFileWriterV2 { // Comparator class name is stored in the trailer in version 2. CellComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = - new HFileBlockIndex.BlockIndexReader(comparator, + new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels()); HFileBlockIndex.BlockIndexReader metaBlockIndexReader = - new HFileBlockIndex.BlockIndexReader( + new HFileBlockIndex.ByteArrayKeyBlockIndexReader( null, 1); HFileBlock.BlockIterator blockIter = blockReader.blockRange( @@ -164,7 +165,7 @@ public class TestHFileWriterV2 { trailer.getDataIndexCount()); if (findMidKey) { - byte[] midkey = dataBlockIndexReader.midkey(); + Cell midkey = dataBlockIndexReader.midkey(); assertNotNull("Midkey should not be null", midkey); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java index 37f83b1..336e850 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileWriterV3.java @@ -38,6 +38,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; @@ -179,10 +180,10 @@ public class TestHFileWriterV3 { // Comparator class name is stored in the trailer in version 2. CellComparator comparator = trailer.createComparator(); HFileBlockIndex.BlockIndexReader dataBlockIndexReader = - new HFileBlockIndex.BlockIndexReader(comparator, + new HFileBlockIndex.CellBasedKeyBlockIndexReader(comparator, trailer.getNumDataIndexLevels()); HFileBlockIndex.BlockIndexReader metaBlockIndexReader = - new HFileBlockIndex.BlockIndexReader( + new HFileBlockIndex.ByteArrayKeyBlockIndexReader( null, 1); HFileBlock.BlockIterator blockIter = blockReader.blockRange( @@ -194,7 +195,7 @@ public class TestHFileWriterV3 { blockIter.nextBlockWithBlockType(BlockType.ROOT_INDEX), trailer.getDataIndexCount()); if (findMidKey) { - byte[] midkey = dataBlockIndexReader.midkey(); + Cell midkey = dataBlockIndexReader.midkey(); assertNotNull("Midkey should not be null", midkey); } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java index 54f79f4..499e57c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFile.java @@ -170,7 +170,7 @@ public class TestStoreFile extends HBaseTestCase { // Split on a row, not in middle of row. Midkey returned by reader // may be in middle of row. Create new one with empty column and // timestamp. - KeyValue kv = KeyValueUtil.createKeyValueFromKey(reader.midkey()); + Cell kv = reader.midkey(); byte [] midRow = kv.getRow(); kv = KeyValueUtil.createKeyValueFromKey(reader.getLastKey()); byte [] finalRow = kv.getRow(); @@ -314,8 +314,8 @@ public class TestStoreFile extends HBaseTestCase { private void checkHalfHFile(final HRegionFileSystem regionFs, final StoreFile f) throws IOException { - byte [] midkey = f.createReader().midkey(); - KeyValue midKV = KeyValueUtil.createKeyValueFromKey(midkey); + Cell midkey = f.createReader().midkey(); + KeyValue midKV = (KeyValue)midkey; byte [] midRow = midKV.getRow(); // Create top split. HRegionInfo topHri = new HRegionInfo(regionFs.getRegionInfo().getTable(), @@ -332,7 +332,7 @@ public class TestStoreFile extends HBaseTestCase { this.fs, bottomPath, conf, cacheConf, BloomType.NONE).createReader(); ByteBuffer previous = null; LOG.info("Midkey: " + midKV.toString()); - ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midkey); + ByteBuffer bbMidkeyBytes = ByteBuffer.wrap(midKV.getKey()); try { // Now make two HalfMapFiles and assert they can read the full backing // file, one from the top and the other from the bottom. @@ -348,7 +348,7 @@ public class TestStoreFile extends HBaseTestCase { if ((topScanner.getReader().getComparator().compare(midKV, key.array(), key.arrayOffset(), key.limit())) > 0) { fail("key=" + Bytes.toStringBinary(key) + " < midkey=" + - Bytes.toStringBinary(midkey)); + midkey); } if (first) { first = false;