Index: hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java
===================================================================
--- hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java (revision 1343115)
+++ hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java (working copy)
@@ -60,7 +60,7 @@
protected static final byte [][] COLUMNS = {fam1, fam2, fam3};
private boolean localfs = false;
- protected Path testDir = null;
+ protected static Path testDir = null;
protected FileSystem fs = null;
protected HRegion root = null;
protected HRegion meta = null;
@@ -161,9 +161,15 @@
* @return An {@link HRegion}
* @throws IOException
*/
- protected HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey,
+ public HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey,
byte [] endKey)
throws IOException {
+ return createNewHRegion(desc, startKey, endKey, this.conf);
+ }
+
+ public HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey,
+ byte [] endKey, Configuration conf)
+ throws IOException {
HRegionInfo hri = new HRegionInfo(desc.getName(), startKey, endKey);
return HRegion.createHRegion(hri, testDir, conf, desc);
}
@@ -226,10 +232,11 @@
* Adds data of the from 'aaa', 'aab', etc where key and value are the same.
* @param r
* @param columnFamily
+ * @param column
* @throws IOException
* @return count of what we added.
*/
- protected static long addContent(final HRegion r, final byte [] columnFamily)
+ public static long addContent(final HRegion r, final byte [] columnFamily, final byte[] column)
throws IOException {
byte [] startKey = r.getRegionInfo().getStartKey();
byte [] endKey = r.getRegionInfo().getEndKey();
@@ -237,7 +244,7 @@
if (startKeyBytes == null || startKeyBytes.length == 0) {
startKeyBytes = START_KEY_BYTES;
}
- return addContent(new HRegionIncommon(r), Bytes.toString(columnFamily), null,
+ return addContent(new HRegionIncommon(r), Bytes.toString(columnFamily), Bytes.toString(column),
startKeyBytes, endKey, -1);
}
@@ -245,18 +252,32 @@
* Add content to region r on the passed column
* column.
* Adds data of the from 'aaa', 'aab', etc where key and value are the same.
+ * @param r
+ * @param columnFamily
+ * @throws IOException
+ * @return count of what we added.
+ */
+ protected static long addContent(final HRegion r, final byte [] columnFamily)
+ throws IOException {
+ return addContent(r, columnFamily, null);
+ }
+
+ /**
+ * Add content to region r on the passed column
+ * column.
+ * Adds data of the from 'aaa', 'aab', etc where key and value are the same.
* @param updater An instance of {@link Incommon}.
* @param columnFamily
* @throws IOException
* @return count of what we added.
*/
protected static long addContent(final Incommon updater,
- final String columnFamily) throws IOException {
+ final String columnFamily) throws IOException {
return addContent(updater, columnFamily, START_KEY_BYTES, null);
}
protected static long addContent(final Incommon updater, final String family,
- final String column) throws IOException {
+ final String column) throws IOException {
return addContent(updater, family, column, START_KEY_BYTES, null);
}
Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java
===================================================================
--- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java (revision 0)
+++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java (revision 0)
@@ -0,0 +1,117 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hbase.HBaseTestCase;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.SmallTests;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.Compression;
+import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory;
+import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics;
+import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.BlockMetricType;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@SuppressWarnings("deprecation")
+@Category(SmallTests.class)
+public class TestBlocksScanned extends HBaseTestCase {
+ private static byte [] TABLE = Bytes.toBytes("TestBlocksScanned");
+ private static byte [] FAMILY = Bytes.toBytes("family");
+ private static byte [] COL = Bytes.toBytes("col");
+ private static byte [] START_KEY = Bytes.toBytes("aaa");
+ private static byte [] END_KEY = Bytes.toBytes("zzz");
+ private static int BLOCK_SIZE = 70;
+
+ private static HBaseTestingUtility TEST_UTIL = null;
+ private static HTableDescriptor TESTTABLEDESC = null;
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ SchemaMetrics.setUseTableNameInTest(true);
+ TEST_UTIL = new HBaseTestingUtility();
+ TESTTABLEDESC = new HTableDescriptor(TABLE);
+
+ TESTTABLEDESC.addFamily(
+ new HColumnDescriptor(FAMILY)
+ .setMaxVersions(10)
+ .setBlockCacheEnabled(true)
+ .setBlocksize(BLOCK_SIZE)
+ .setCompressionType(Compression.Algorithm.NONE)
+ );
+ }
+
+ @Test
+ public void testBlocksScanned() throws Exception {
+ HRegion r = createNewHRegion(TESTTABLEDESC, START_KEY, END_KEY,
+ TEST_UTIL.getConfiguration());
+ addContent(r, FAMILY, COL);
+ r.flushcache();
+
+ // Get the per-cf metrics
+ SchemaMetrics schemaMetrics =
+ SchemaMetrics.getInstance(Bytes.toString(TABLE), Bytes.toString(FAMILY));
+ Map schemaMetricSnapshot = SchemaMetrics.getMetricsSnapshot();
+
+ // Do simple test of getting one row only first.
+ Scan scan = new Scan(Bytes.toBytes("aaa"), Bytes.toBytes("aaz"));
+ scan.addColumn(FAMILY, COL);
+ scan.setMaxVersions(1);
+
+ InternalScanner s = r.getScanner(scan);
+ List results = new ArrayList();
+ while (s.next(results));
+ s.close();
+
+ int expectResultSize = 'z' - 'a';
+ Assert.assertEquals(expectResultSize, results.size());
+
+ int kvPerBlock = (int) Math.ceil(BLOCK_SIZE / (double) results.get(0).getLength());
+ Assert.assertEquals(2, kvPerBlock);
+
+ long expectDataBlockRead = (long) Math.ceil(expectResultSize / (double) kvPerBlock);
+ long expectIndexBlockRead = expectDataBlockRead;
+
+ verifyDataAndIndexBlockRead(schemaMetricSnapshot, schemaMetrics,
+ expectDataBlockRead, expectIndexBlockRead);
+ }
+
+ private void verifyDataAndIndexBlockRead(Map previousMetricSnapshot,
+ SchemaMetrics schemaMetrics, long expectDataBlockRead, long expectedIndexBlockRead){
+ Map currentMetricsSnapshot = SchemaMetrics.getMetricsSnapshot();
+ Map diffs =
+ SchemaMetrics.diffMetrics(previousMetricSnapshot, currentMetricsSnapshot);
+
+ long dataBlockRead = SchemaMetrics.getLong(diffs,
+ schemaMetrics.getBlockMetricName(BlockCategory.DATA, false, BlockMetricType.READ_COUNT));
+ long indexBlockRead = SchemaMetrics.getLong(diffs,
+ schemaMetrics.getBlockMetricName(BlockCategory.INDEX, false, BlockMetricType.READ_COUNT));
+
+ Assert.assertEquals(expectDataBlockRead, dataBlockRead);
+ Assert.assertEquals(expectedIndexBlockRead, indexBlockRead);
+ }
+}
Index: hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java
===================================================================
--- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java (revision 1343115)
+++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java (working copy)
@@ -22,6 +22,8 @@
import java.util.ArrayList;
import java.util.List;
+import junit.framework.Assert;
+
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
@@ -87,7 +89,7 @@
String value = valueList.get(i);
long start = System.nanoTime();
scanner.reseekTo(Bytes.toBytes(key));
- assertEquals(value, scanner.getValueString());
+ assertEquals("i is " + i, value, scanner.getValueString());
}
reader.close();
Index: hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java
===================================================================
--- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (revision 1343115)
+++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (working copy)
@@ -380,8 +380,8 @@
// Now test we can get the offset and the on-disk-size using a
// higher-level API function.s
boolean locateBlockResult =
- BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
- searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR);
+ (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey,
+ searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR) != -1);
if (i == 0) {
assertFalse(locateBlockResult);
Index: hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java
===================================================================
--- hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java (revision 1343115)
+++ hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java (working copy)
@@ -98,6 +98,89 @@
reader.close();
}
+ public void testSeekBeforeWithReSeekTo() throws Exception {
+ Path p = makeNewFile();
+ HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf));
+ reader.loadFileInfo();
+ HFileScanner scanner = reader.getScanner(false, true);
+ assertEquals(false, scanner.seekBefore(toKV("a").getKey()));
+ assertEquals(false, scanner.seekBefore(toKV("b").getKey()));
+ assertEquals(false, scanner.seekBefore(toKV("c").getKey()));
+
+ // seekBefore d, so the scanner points to c
+ assertEquals(true, scanner.seekBefore(toKV("d").getKey()));
+ assertEquals("c", toRowStr(scanner.getKeyValue()));
+ // reseekTo e and g
+ assertEquals(0, scanner.reseekTo(toKV("c").getKey()));
+ assertEquals("c", toRowStr(scanner.getKeyValue()));
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore e, so the scanner points to c
+ assertEquals(true, scanner.seekBefore(toKV("e").getKey()));
+ assertEquals("c", toRowStr(scanner.getKeyValue()));
+ // reseekTo e and g
+ assertEquals(0, scanner.reseekTo(toKV("e").getKey()));
+ assertEquals("e", toRowStr(scanner.getKeyValue()));
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore f, so the scanner points to e
+ assertEquals(true, scanner.seekBefore(toKV("f").getKey()));
+ assertEquals("e", toRowStr(scanner.getKeyValue()));
+ // reseekTo e and g
+ assertEquals(0, scanner.reseekTo(toKV("e").getKey()));
+ assertEquals("e", toRowStr(scanner.getKeyValue()));
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore g, so the scanner points to e
+ assertEquals(true, scanner.seekBefore(toKV("g").getKey()));
+ assertEquals("e", toRowStr(scanner.getKeyValue()));
+ // reseekTo e and g again
+ assertEquals(0, scanner.reseekTo(toKV("e").getKey()));
+ assertEquals("e", toRowStr(scanner.getKeyValue()));
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore h, so the scanner points to g
+ assertEquals(true, scanner.seekBefore(toKV("h").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+ // reseekTo g
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore i, so the scanner points to g
+ assertEquals(true, scanner.seekBefore(toKV("i").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+ // reseekTo g
+ assertEquals(0, scanner.reseekTo(toKV("g").getKey()));
+ assertEquals("g", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore j, so the scanner points to i
+ assertEquals(true, scanner.seekBefore(toKV("j").getKey()));
+ assertEquals("i", toRowStr(scanner.getKeyValue()));
+ // reseekTo i
+ assertEquals(0, scanner.reseekTo(toKV("i").getKey()));
+ assertEquals("i", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore k, so the scanner points to i
+ assertEquals(true, scanner.seekBefore(toKV("k").getKey()));
+ assertEquals("i", toRowStr(scanner.getKeyValue()));
+ // reseekTo i and k
+ assertEquals(0, scanner.reseekTo(toKV("i").getKey()));
+ assertEquals("i", toRowStr(scanner.getKeyValue()));
+ assertEquals(0, scanner.reseekTo(toKV("k").getKey()));
+ assertEquals("k", toRowStr(scanner.getKeyValue()));
+
+ // seekBefore l, so the scanner points to k
+ assertEquals(true, scanner.seekBefore(toKV("l").getKey()));
+ assertEquals("k", toRowStr(scanner.getKeyValue()));
+ // reseekTo k
+ assertEquals(0, scanner.reseekTo(toKV("k").getKey()));
+ assertEquals("k", toRowStr(scanner.getKeyValue()));
+ }
+
public void testSeekTo() throws Exception {
Path p = makeNewFile();
HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf));
Index: hbase-server/src/main/java/org/apache/hadoop/hbase/HConstants.java
===================================================================
--- hbase-server/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1343115)
+++ hbase-server/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy)
@@ -667,6 +667,12 @@
public static final String LOAD_BALANCER_SLOP_KEY = "hbase.regions.slop";
+ /**
+ * The byte array represents for NO_NEXT_INDEXED_KEY;
+ * The actual value is irrelevant because this is always compared by reference.
+ */
+ public static final byte [] NO_NEXT_INDEXED_KEY = Bytes.toBytes("NO_NEXT_INDEXED_KEY");
+
private HConstants() {
// Can't be instantiated with this ctor.
}
Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java
===================================================================
--- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java (revision 0)
+++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java (revision 0)
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+/**
+ * BlockWithScanInfo is wrapper class for HFileBlock with other attributes. These attributes are
+ * supposed to be much cheaper to be maintained in each caller thread than in HFileBlock itself.
+ */
+public class BlockWithScanInfo {
+ private final HFileBlock hFileBlock;
+ /**
+ * The first key in the next block following this one in the HFile.
+ * If this key is unknown, this is reference-equal with HConstants.NO_NEXT_INDEXED_KEY
+ */
+ private final byte[] nextIndexedKey;
+
+ public BlockWithScanInfo(HFileBlock hFileBlock, byte[] nextIndexedKey) {
+ this.hFileBlock = hFileBlock;
+ this.nextIndexedKey = nextIndexedKey;
+ }
+
+ public HFileBlock getHFileBlock() {
+ return hFileBlock;
+ }
+
+ public byte[] getNextIndexedKey() {
+ return nextIndexedKey;
+ }
+}
Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java
===================================================================
--- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (revision 1343115)
+++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (working copy)
@@ -30,6 +30,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.fs.HFileSystem;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder;
@@ -431,6 +432,15 @@
extends AbstractHFileReader.Scanner {
protected HFileBlock block;
+ /**
+ * The next indexed key is to keep track of the indexed key of the next data block.
+ * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the
+ * current data block is the last data block.
+ *
+ * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet.
+ */
+ protected byte[] nextIndexedKey;
+
public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks,
final boolean pread, final boolean isCompaction) {
super(r, cacheBlocks, pread, isCompaction);
@@ -454,19 +464,20 @@
throws IOException {
HFileBlockIndex.BlockIndexReader indexReader =
reader.getDataBlockIndexReader();
- HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length,
- block, cacheBlocks, pread, isCompaction);
- if (seekToBlock == null) {
+ BlockWithScanInfo blockWithScanInfo =
+ indexReader.loadDataBlockWithScanInfo(key, offset, length, block,
+ cacheBlocks, pread, isCompaction);
+ if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) {
// This happens if the key e.g. falls before the beginning of the file.
return -1;
}
- return loadBlockAndSeekToKey(seekToBlock, rewind, key, offset, length,
- false);
+ return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(),
+ blockWithScanInfo.getNextIndexedKey(), rewind, key, offset, length, false);
}
protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock);
- protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock,
+ protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey,
boolean rewind, byte[] key, int offset, int length, boolean seekBefore)
throws IOException;
@@ -479,17 +490,28 @@
@Override
public int reseekTo(byte[] key, int offset, int length) throws IOException {
+ int compared;
if (isSeeked()) {
ByteBuffer bb = getKey();
- int compared = reader.getComparator().compare(key, offset,
+ compared = reader.getComparator().compare(key, offset,
length, bb.array(), bb.arrayOffset(), bb.limit());
if (compared < 1) {
// If the required key is less than or equal to current key, then
// don't do anything.
return compared;
+ } else {
+ if (this.nextIndexedKey != null &&
+ (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY ||
+ reader.getComparator().compare(key, offset, length,
+ nextIndexedKey, 0, nextIndexedKey.length) < 0)) {
+ // The reader shall continue to scan the current data block instead of querying the
+ // block index as long as it knows the target key is strictly smaller than
+ // the next indexed key or the current data block is the last data block.
+ return loadBlockAndSeekToKey(this.block, this.nextIndexedKey,
+ false, key, offset, length, false);
+ }
}
}
-
// Don't rewind on a reseek operation, because reseek implies that we are
// always going forward in the file.
return seekTo(key, offset, length, false);
@@ -505,6 +527,7 @@
return false;
}
ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock);
+
if (reader.getComparator().compare(firstKey.array(),
firstKey.arrayOffset(), firstKey.limit(), key, offset, length) == 0)
{
@@ -521,11 +544,11 @@
seekToBlock = reader.readBlock(previousBlockOffset,
seekToBlock.getOffset() - previousBlockOffset, cacheBlocks,
pread, isCompaction, BlockType.DATA);
-
// TODO shortcut: seek forward in this block to the last key of the
// block.
}
- loadBlockAndSeekToKey(seekToBlock, true, key, offset, length, true);
+ byte[] firstKeyInCurrentBlock = Bytes.getBytes(firstKey);
+ loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, offset, length, true);
return true;
}
@@ -701,14 +724,17 @@
}
@Override
- protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind,
- byte[] key, int offset, int length, boolean seekBefore)
+ protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey,
+ boolean rewind, byte[] key, int offset, int length, boolean seekBefore)
throws IOException {
if (block == null || block.getOffset() != seekToBlock.getOffset()) {
updateCurrBlock(seekToBlock);
} else if (rewind) {
blockBuffer.rewind();
}
+
+ // Update the nextIndexedKey
+ this.nextIndexedKey = nextIndexedKey;
return blockSeek(key, offset, length, seekBefore);
}
@@ -733,6 +759,9 @@
blockBuffer = block.getBufferWithoutHeader();
readKeyValueLen();
blockFetches++;
+
+ // Reset the next indexed key
+ this.nextIndexedKey = null;
}
private final void readKeyValueLen() {
@@ -1018,14 +1047,15 @@
}
@Override
- protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind,
- byte[] key, int offset, int length, boolean seekBefore)
+ protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey,
+ boolean rewind, byte[] key, int offset, int length, boolean seekBefore)
throws IOException {
if (block == null || block.getOffset() != seekToBlock.getOffset()) {
updateCurrentBlock(seekToBlock);
} else if (rewind) {
seeker.rewind();
}
+ this.nextIndexedKey = nextIndexedKey;
return seeker.seekToKeyInBlock(key, offset, length, seekBefore);
}
}
Index: hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java
===================================================================
--- hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (revision 1343115)
+++ hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (working copy)
@@ -37,6 +37,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
@@ -178,16 +179,56 @@
int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
boolean pread, boolean isCompaction)
throws IOException {
+ BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, keyOffset, keyLength,
+ currentBlock, cacheBlocks, pread, isCompaction);
+ if (blockWithScanInfo == null) {
+ return null;
+ } else {
+ return blockWithScanInfo.getHFileBlock();
+ }
+ }
+
+ /**
+ * Return the BlockWithScanInfo which contains the DataBlock with other scan info
+ * such as nextIndexedKey.
+ * This function will only be called when the HFile version is larger than 1.
+ *
+ * @param key the key we are looking for
+ * @param keyOffset the offset of the key in its byte array
+ * @param keyLength the length of the key
+ * @param currentBlock the current block, to avoid re-reading the same
+ * block
+ * @param cacheBlocks
+ * @param pread
+ * @param isCompaction
+ * @return the BlockWithScanInfo which contains the DataBlock with other scan info
+ * such as nextIndexedKey.
+ * @throws IOException
+ */
+ public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset,
+ int keyLength, HFileBlock currentBlock, boolean cacheBlocks,
+ boolean pread, boolean isCompaction)
+ throws IOException {
int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength);
if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) {
return null;
}
+ // the next indexed key
+ byte[] nextIndexedKey = null;
+
// Read the next-level (intermediate or leaf) index block.
long currentOffset = blockOffsets[rootLevelIndex];
int currentOnDiskSize = blockDataSizes[rootLevelIndex];
+ if (rootLevelIndex < blockKeys.length - 1) {
+ nextIndexedKey = blockKeys[rootLevelIndex + 1];
+ } else {
+ nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY;
+ }
+
int lookupLevel = 1; // How many levels deep we are in our lookup.
+ int index = -1;
HFileBlock block;
while (true) {
@@ -238,8 +279,8 @@
// Locate the entry corresponding to the given key in the non-root
// (leaf or intermediate-level) index block.
ByteBuffer buffer = block.getBufferWithoutHeader();
- if (!locateNonRootIndexEntry(buffer, key, keyOffset, keyLength,
- comparator)) {
+ index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator);
+ if (index == -1) {
throw new IOException("The key "
+ Bytes.toStringBinary(key, keyOffset, keyLength)
+ " is before the" + " first key of the non-root index block "
@@ -248,6 +289,12 @@
currentOffset = buffer.getLong();
currentOnDiskSize = buffer.getInt();
+
+ // Only update next indexed key if there is a next indexed key in the current level
+ byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1);
+ if (tmpNextIndexedKey != null) {
+ nextIndexedKey = tmpNextIndexedKey;
+ }
}
if (lookupLevel != searchTreeLevel) {
@@ -255,7 +302,9 @@
" but the number of levels is " + searchTreeLevel);
}
- return block;
+ // set the next indexed key for the current block.
+ BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey);
+ return blockWithScanInfo;
}
/**
@@ -381,6 +430,41 @@
}
/**
+ * The indexed key at the ith position in the nonRootIndex. The position starts at 0.
+ * @param nonRootIndex
+ * @param i the ith position
+ * @return The indexed key at the ith position in the nonRootIndex.
+ */
+ private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) {
+ int numEntries = nonRootIndex.getInt(0);
+ if (i < 0 || i >= numEntries) {
+ return null;
+ }
+
+ // Entries start after the number of entries and the secondary index.
+ // The secondary index takes numEntries + 1 ints.
+ int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
+ // Targetkey's offset relative to the end of secondary index
+ int targetKeyRelOffset = nonRootIndex.getInt(
+ Bytes.SIZEOF_INT * (i + 1));
+
+ // The offset of the target key in the blockIndex buffer
+ int targetKeyOffset = entriesOffset // Skip secondary index
+ + targetKeyRelOffset // Skip all entries until mid
+ + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size
+
+ // We subtract the two consecutive secondary index elements, which
+ // gives us the size of the whole (offset, onDiskSize, key) tuple. We
+ // then need to subtract the overhead of offset and onDiskSize.
+ int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) -
+ targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD;
+
+ int from = nonRootIndex.arrayOffset() + targetKeyOffset;
+ int to = from + targetKeyLength;
+ return Arrays.copyOfRange(nonRootIndex.array(), from, to);
+ }
+
+ /**
* Performs a binary search over a non-root level index block. Utilizes the
* secondary index, which records the offsets of (offset, onDiskSize,
* firstKey) tuples of all entries.
@@ -480,31 +564,30 @@
* @param key the byte array containing the key
* @param keyOffset the offset of the key in its byte array
* @param keyLength the length of the key
- * @return true in the case the index entry containing the given key was
- * found, false in the case the given key is before the first key
+ * @return the index position where the given key was found,
+ * otherwise return -1 in the case the given key is before the first key.
*
*/
- static boolean locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key,
+ static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key,
int keyOffset, int keyLength, RawComparator comparator) {
int entryIndex = binarySearchNonRootIndex(key, keyOffset, keyLength,
nonRootBlock, comparator);
- if (entryIndex == -1) {
- return false;
- }
+ if (entryIndex != -1) {
+ int numEntries = nonRootBlock.getInt(0);
- int numEntries = nonRootBlock.getInt(0);
+ // The end of secondary index and the beginning of entries themselves.
+ int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
- // The end of secondary index and the beginning of entries themselves.
- int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2);
+ // The offset of the entry we are interested in relative to the end of
+ // the secondary index.
+ int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT
+ * (1 + entryIndex));
- // The offset of the entry we are interested in relative to the end of
- // the secondary index.
- int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT
- * (1 + entryIndex));
+ nonRootBlock.position(entriesOffset + entryRelOffset);
+ }
- nonRootBlock.position(entriesOffset + entryRelOffset);
- return true;
+ return entryIndex;
}
/**