diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java index 3e26107..4741e78 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlock.java @@ -187,6 +187,7 @@ public class HFileBlock implements Cacheable { */ private int nextBlockOnDiskSizeWithHeader = -1; + private volatile int[] index; /** * Creates a new {@link HFile} block from the given fields. This constructor * is mostly used when the block data has already been read and uncompressed, @@ -1740,6 +1741,12 @@ public class HFileBlock implements Cacheable { return this.fileContext; } + public void setIndex(int[] index) { + this.index = index; + } + public int[] getIndex() { + return this.index; + } /** * Convert the contents of the block header into a human readable string. * This is mostly helpful for debugging. This assumes that the block diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java index 1292319..8a2151d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java @@ -21,8 +21,11 @@ import java.io.DataInput; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.List; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -741,7 +744,7 @@ public class HFileReaderV2 extends AbstractHFileReader { * Implementation of {@link HFileScanner} interface. */ protected static class ScannerV2 extends AbstractScannerV2 { - private HFileReaderV2 reader; + protected HFileReaderV2 reader; public ScannerV2(HFileReaderV2 r, boolean cacheBlocks, final boolean pread, final boolean isCompaction) { @@ -919,6 +922,7 @@ public class HFileReaderV2 extends AbstractHFileReader { } blockBuffer = block.getBufferWithoutHeader(); + updateIndex(); readKeyValueLen(); blockFetches++; @@ -926,6 +930,46 @@ public class HFileReaderV2 extends AbstractHFileReader { this.nextIndexedKey = null; } + protected void updateIndex() { + if (block.getIndex() == null) { + // generate a block index --- do this at load time instead? Would save the synch huh-hah + synchronized (block) { + if (block.getIndex() == null) { + int oldPosition = blockBuffer.position(); + List indexes = new ArrayList(); + while(blockBuffer.remaining() > 0) { + indexes.add(blockBuffer.position()); + readKeyValueLen(); + blockBuffer.position(blockBuffer.position() + currKeyLen + currValueLen + currMemstoreTSLen + KEY_VALUE_LEN_SIZE); + } + block.setIndex(ArrayUtils.toPrimitive(indexes.toArray(new Integer[indexes.size()]))); + blockBuffer.position(oldPosition); + // --- + } + } + } + } + + protected int binarySearch(Cell key) { + KeyValue.KeyOnlyKeyValue keyOnlykv = new KeyValue.KeyOnlyKeyValue(); + + int[] a = block.getIndex(); + int low = 0; + int high = a.length - 1; + + while (low <= high) { + int mid = (low + high) >>> 1; + int kvOffset = blockBuffer.arrayOffset() + a[mid]; + int klen = Bytes.toInt(blockBuffer.array(), kvOffset); + keyOnlykv.setKey(blockBuffer.array(), kvOffset+KEY_VALUE_LEN_SIZE, klen); + int cmp = reader.getComparator().compareOnlyKeyPortion(key, keyOnlykv); + if (cmp > 0) low = mid + 1; + else if (cmp < 0) high = mid - 1; + else return mid; // found + } + return -(low+1); // not found. + } + protected void readKeyValueLen() { blockBuffer.mark(); currKeyLen = blockBuffer.getInt(); @@ -978,6 +1022,43 @@ public class HFileReaderV2 extends AbstractHFileReader { * key) */ protected int blockSeek(Cell key, boolean seekBefore) { + int pos = binarySearch(key); + if (pos >= 0) { + if (seekBefore) { + if (pos == 0) { + KeyValue kv = KeyValueUtil.ensureKeyValue(key); + throw new IllegalStateException("blockSeek with seekBefore " + + "at the first key of the block: key=" + + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength()) + + ", blockOffset=" + block.getOffset() + ", onDiskSize=" + + block.getOnDiskSizeWithHeader()); + } + blockBuffer.position(block.getIndex()[pos-1]); + //blockBuffer.position(pos); + readKeyValueLen(); + return 1; // non exact match. + } else { + blockBuffer.position(block.getIndex()[pos]); + readKeyValueLen(); + return 0; // indicate exact match + } + } else { + // key does not exist, place scanner right before + if (-pos-1 == 0) { + blockBuffer.position(block.getIndex()[-pos - 1]); + } else { + blockBuffer.position(block.getIndex()[-pos - 1 - 1]); + } + readKeyValueLen(); + if (-pos-1 == 0 + && this.reader.trailer.getMinorVersion() >= MINOR_VERSION_WITH_FAKED_KEY) { + return HConstants.INDEX_KEY_MAGIC; + } + return 1; // non exact match + } + } + + protected int blockSeekOld(Cell key, boolean seekBefore) { int klen, vlen; long memstoreTS = 0; int memstoreTSLen = 0; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java index 5460a14..ab35d6d 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV3.java @@ -20,7 +20,10 @@ package org.apache.hadoop.hbase.io.hfile; import java.io.IOException; import java.security.Key; import java.security.KeyException; +import java.util.ArrayList; +import java.util.List; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; @@ -246,6 +249,42 @@ public class HFileReaderV3 extends HFileReaderV2 { */ @Override protected int blockSeek(Cell key, boolean seekBefore) { + int pos = binarySearch(key); + if (pos >= 0) { + if (seekBefore) { + if (pos == 0) { + KeyValue kv = KeyValueUtil.ensureKeyValue(key); + throw new IllegalStateException("blockSeek with seekBefore " + + "at the first key of the block: key=" + + Bytes.toStringBinary(kv.getKey(), kv.getKeyOffset(), kv.getKeyLength()) + + ", blockOffset=" + block.getOffset() + ", onDiskSize=" + + block.getOnDiskSizeWithHeader()); + } + blockBuffer.position(block.getIndex()[pos-1]); + //blockBuffer.position(pos); + readKeyValueLen(); + return 1; // non exact match. + } else { + blockBuffer.position(block.getIndex()[pos]); + readKeyValueLen(); + return 0; // indicate exact match + } + } else { + // key does not exist, place scanner right before + if (-pos-1 == 0) { + blockBuffer.position(block.getIndex()[-pos - 1]); + } else { + blockBuffer.position(block.getIndex()[-pos - 1 - 1]); + } + readKeyValueLen(); + if (-pos-1 == 0) { + return HConstants.INDEX_KEY_MAGIC; + } + return 1; // non exact match + } + } + + protected int blockSeekOld(Cell key, boolean seekBefore) { int klen, vlen, tlen = 0; long memstoreTS = 0; int memstoreTSLen = 0; @@ -340,8 +379,32 @@ public class HFileReaderV3 extends HFileReaderV2 { readKeyValueLen(); return 1; // didn't exactly find it. } + + protected void updateIndex() { + if (block.getIndex() == null) { + // generate a block index --- do this at load time instead? Would save the synch huh-hah + synchronized (block) { + if (block.getIndex() == null) { + int oldPosition = blockBuffer.position(); + List indexes = new ArrayList(); + while(blockBuffer.remaining() > 0) { + indexes.add(blockBuffer.position()); + readKeyValueLen(); + blockBuffer.position(blockBuffer.position() + currKeyLen + currValueLen + + currMemstoreTSLen + + (reader.hfileContext.isIncludesTags() ? currTagsLen + Bytes.SIZEOF_SHORT : 0) + + KEY_VALUE_LEN_SIZE); + } + block.setIndex(ArrayUtils.toPrimitive(indexes.toArray(new Integer[indexes.size()]))); + blockBuffer.position(oldPosition); + // --- + } + } + } + } } + /** * ScannerV3 that operates on encoded data blocks. */