Index: src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1399102) +++ src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -79,7 +79,7 @@ /** Cluster is fully-distributed */ public static final boolean CLUSTER_IS_DISTRIBUTED = true; - /** Default value for cluster distributed mode */ + /** Default value for cluster distributed mode */ public static final boolean DEFAULT_CLUSTER_DISTRIBUTED = CLUSTER_IS_LOCAL; /** default host address */ @@ -187,13 +187,13 @@ /** Default value for thread wake frequency */ public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000; - + /** Parameter name for how often we should try to write a version file, before failing */ public static final String VERSION_FILE_WRITE_ATTEMPTS = "hbase.server.versionfile.writeattempts"; /** Parameter name for how often we should try to write a version file, before failing */ public static final int DEFAULT_VERSION_FILE_WRITE_ATTEMPTS = 3; - + /** Parameter name for how often a region should should perform a major compaction */ public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction"; @@ -618,10 +618,10 @@ /** File permission umask to use when creating hbase data files */ public static final String DATA_FILE_UMASK_KEY = "hbase.data.umask"; - /** + /** * If this parameter is set to true, then hbase will read - * data and then verify checksums. Checksum verification - * inside hdfs will be switched off. However, if the hbase-checksum + * data and then verify checksums. Checksum verification + * inside hdfs will be switched off. However, if the hbase-checksum * verification fails, then it will switch back to using * hdfs checksums for verifiying data that is being read from storage. * @@ -629,7 +629,7 @@ * verify any checksums, instead it will depend on checksum verification * being done in the hdfs client. */ - public static final String HBASE_CHECKSUM_VERIFICATION = + public static final String HBASE_CHECKSUM_VERIFICATION = "hbase.regionserver.checksum.verify"; /** @@ -661,7 +661,12 @@ public static final int HIGH_QOS = 100; public static final int REPLICATION_QOS = 5; // normal_QOS < replication_QOS < high_QOS - + /** + * The byte array represents for NO_NEXT_INDEXED_KEY; + * The actual value is irrelevant because this is always compared by reference. + */ + public static final byte [] NO_NEXT_INDEXED_KEY = Bytes.toBytes("NO_NEXT_INDEXED_KEY"); + private HConstants() { // Can't be instantiated with this ctor. } Index: src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/BlockWithScanInfo.java (working copy) @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +/** + * BlockWithScanInfo is wrapper class for HFileBlock with other attributes. These attributes are + * supposed to be much cheaper to be maintained in each caller thread than in HFileBlock itself. + */ +public class BlockWithScanInfo { + private final HFileBlock hFileBlock; + /** + * The first key in the next block following this one in the HFile. + * If this key is unknown, this is reference-equal with HConstants.NO_NEXT_INDEXED_KEY + */ + private final byte[] nextIndexedKey; + + public BlockWithScanInfo(HFileBlock hFileBlock, byte[] nextIndexedKey) { + this.hFileBlock = hFileBlock; + this.nextIndexedKey = nextIndexedKey; + } + + public HFileBlock getHFileBlock() { + return hFileBlock; + } + + public byte[] getNextIndexedKey() { + return nextIndexedKey; + } +} Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (revision 1399102) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileBlockIndex.java (working copy) @@ -36,6 +36,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; @@ -176,16 +177,56 @@ int keyLength, HFileBlock currentBlock, boolean cacheBlocks, boolean pread, boolean isCompaction) throws IOException { + BlockWithScanInfo blockWithScanInfo = loadDataBlockWithScanInfo(key, keyOffset, keyLength, + currentBlock, cacheBlocks, pread, isCompaction); + if (blockWithScanInfo == null) { + return null; + } else { + return blockWithScanInfo.getHFileBlock(); + } + } + + /** + * Return the BlockWithScanInfo which contains the DataBlock with other scan info + * such as nextIndexedKey. + * This function will only be called when the HFile version is larger than 1. + * + * @param key the key we are looking for + * @param keyOffset the offset of the key in its byte array + * @param keyLength the length of the key + * @param currentBlock the current block, to avoid re-reading the same + * block + * @param cacheBlocks + * @param pread + * @param isCompaction + * @return the BlockWithScanInfo which contains the DataBlock with other scan info + * such as nextIndexedKey. + * @throws IOException + */ + public BlockWithScanInfo loadDataBlockWithScanInfo(final byte[] key, int keyOffset, + int keyLength, HFileBlock currentBlock, boolean cacheBlocks, + boolean pread, boolean isCompaction) + throws IOException { int rootLevelIndex = rootBlockContainingKey(key, keyOffset, keyLength); if (rootLevelIndex < 0 || rootLevelIndex >= blockOffsets.length) { return null; } + // the next indexed key + byte[] nextIndexedKey = null; + // Read the next-level (intermediate or leaf) index block. long currentOffset = blockOffsets[rootLevelIndex]; int currentOnDiskSize = blockDataSizes[rootLevelIndex]; + if (rootLevelIndex < blockKeys.length - 1) { + nextIndexedKey = blockKeys[rootLevelIndex + 1]; + } else { + nextIndexedKey = HConstants.NO_NEXT_INDEXED_KEY; + } + int lookupLevel = 1; // How many levels deep we are in our lookup. + int index = -1; HFileBlock block; while (true) { @@ -236,8 +277,8 @@ // Locate the entry corresponding to the given key in the non-root // (leaf or intermediate-level) index block. ByteBuffer buffer = block.getBufferWithoutHeader(); - if (!locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, - comparator)) { + index = locateNonRootIndexEntry(buffer, key, keyOffset, keyLength, comparator); + if (index == -1) { throw new IOException("The key " + Bytes.toStringBinary(key, keyOffset, keyLength) + " is before the" + " first key of the non-root index block " @@ -246,6 +287,12 @@ currentOffset = buffer.getLong(); currentOnDiskSize = buffer.getInt(); + + // Only update next indexed key if there is a next indexed key in the current level + byte[] tmpNextIndexedKey = getNonRootIndexedKey(buffer, index + 1); + if (tmpNextIndexedKey != null) { + nextIndexedKey = tmpNextIndexedKey; + } } if (lookupLevel != searchTreeLevel) { @@ -253,7 +300,9 @@ " but the number of levels is " + searchTreeLevel); } - return block; + // set the next indexed key for the current block. + BlockWithScanInfo blockWithScanInfo = new BlockWithScanInfo(block, nextIndexedKey); + return blockWithScanInfo; } /** @@ -379,6 +428,41 @@ } /** + * The indexed key at the ith position in the nonRootIndex. The position starts at 0. + * @param nonRootIndex + * @param i the ith position + * @return The indexed key at the ith position in the nonRootIndex. + */ + private byte[] getNonRootIndexedKey(ByteBuffer nonRootIndex, int i) { + int numEntries = nonRootIndex.getInt(0); + if (i < 0 || i >= numEntries) { + return null; + } + + // Entries start after the number of entries and the secondary index. + // The secondary index takes numEntries + 1 ints. + int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2); + // Targetkey's offset relative to the end of secondary index + int targetKeyRelOffset = nonRootIndex.getInt( + Bytes.SIZEOF_INT * (i + 1)); + + // The offset of the target key in the blockIndex buffer + int targetKeyOffset = entriesOffset // Skip secondary index + + targetKeyRelOffset // Skip all entries until mid + + SECONDARY_INDEX_ENTRY_OVERHEAD; // Skip offset and on-disk-size + + // We subtract the two consecutive secondary index elements, which + // gives us the size of the whole (offset, onDiskSize, key) tuple. We + // then need to subtract the overhead of offset and onDiskSize. + int targetKeyLength = nonRootIndex.getInt(Bytes.SIZEOF_INT * (i + 2)) - + targetKeyRelOffset - SECONDARY_INDEX_ENTRY_OVERHEAD; + + int from = nonRootIndex.arrayOffset() + targetKeyOffset; + int to = from + targetKeyLength; + return Arrays.copyOfRange(nonRootIndex.array(), from, to); + } + + /** * Performs a binary search over a non-root level index block. Utilizes the * secondary index, which records the offsets of (offset, onDiskSize, * firstKey) tuples of all entries. @@ -478,31 +562,30 @@ * @param key the byte array containing the key * @param keyOffset the offset of the key in its byte array * @param keyLength the length of the key - * @return true in the case the index entry containing the given key was - * found, false in the case the given key is before the first key + * @return the index position where the given key was found, + * otherwise return -1 in the case the given key is before the first key. * */ - static boolean locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key, + static int locateNonRootIndexEntry(ByteBuffer nonRootBlock, byte[] key, int keyOffset, int keyLength, RawComparator comparator) { int entryIndex = binarySearchNonRootIndex(key, keyOffset, keyLength, nonRootBlock, comparator); - if (entryIndex == -1) { - return false; - } + if (entryIndex != -1) { + int numEntries = nonRootBlock.getInt(0); - int numEntries = nonRootBlock.getInt(0); + // The end of secondary index and the beginning of entries themselves. + int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2); - // The end of secondary index and the beginning of entries themselves. - int entriesOffset = Bytes.SIZEOF_INT * (numEntries + 2); + // The offset of the entry we are interested in relative to the end of + // the secondary index. + int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT + * (1 + entryIndex)); - // The offset of the entry we are interested in relative to the end of - // the secondary index. - int entryRelOffset = nonRootBlock.getInt(Bytes.SIZEOF_INT - * (1 + entryIndex)); + nonRootBlock.position(entriesOffset + entryRelOffset); + } - nonRootBlock.position(entriesOffset + entryRelOffset); - return true; + return entryIndex; } /** Index: src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (revision 1399102) +++ src/main/java/org/apache/hadoop/hbase/io/hfile/HFileReaderV2.java (working copy) @@ -29,6 +29,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.fs.HFileSystem; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoder; @@ -429,6 +430,15 @@ extends AbstractHFileReader.Scanner { protected HFileBlock block; + /** + * The next indexed key is to keep track of the indexed key of the next data block. + * If the nextIndexedKey is HConstants.NO_NEXT_INDEXED_KEY, it means that the + * current data block is the last data block. + * + * If the nextIndexedKey is null, it means the nextIndexedKey has not been loaded yet. + */ + protected byte[] nextIndexedKey; + public AbstractScannerV2(HFileReaderV2 r, boolean cacheBlocks, final boolean pread, final boolean isCompaction) { super(r, cacheBlocks, pread, isCompaction); @@ -452,19 +462,20 @@ throws IOException { HFileBlockIndex.BlockIndexReader indexReader = reader.getDataBlockIndexReader(); - HFileBlock seekToBlock = indexReader.seekToDataBlock(key, offset, length, - block, cacheBlocks, pread, isCompaction); - if (seekToBlock == null) { + BlockWithScanInfo blockWithScanInfo = + indexReader.loadDataBlockWithScanInfo(key, offset, length, block, + cacheBlocks, pread, isCompaction); + if (blockWithScanInfo == null || blockWithScanInfo.getHFileBlock() == null) { // This happens if the key e.g. falls before the beginning of the file. return -1; } - return loadBlockAndSeekToKey(seekToBlock, rewind, key, offset, length, - false); + return loadBlockAndSeekToKey(blockWithScanInfo.getHFileBlock(), + blockWithScanInfo.getNextIndexedKey(), rewind, key, offset, length, false); } protected abstract ByteBuffer getFirstKeyInBlock(HFileBlock curBlock); - protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, + protected abstract int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, boolean rewind, byte[] key, int offset, int length, boolean seekBefore) throws IOException; @@ -477,17 +488,28 @@ @Override public int reseekTo(byte[] key, int offset, int length) throws IOException { + int compared; if (isSeeked()) { ByteBuffer bb = getKey(); - int compared = reader.getComparator().compare(key, offset, + compared = reader.getComparator().compare(key, offset, length, bb.array(), bb.arrayOffset(), bb.limit()); if (compared < 1) { // If the required key is less than or equal to current key, then // don't do anything. return compared; + } else { + if (this.nextIndexedKey != null && + (this.nextIndexedKey == HConstants.NO_NEXT_INDEXED_KEY || + reader.getComparator().compare(key, offset, length, + nextIndexedKey, 0, nextIndexedKey.length) < 0)) { + // The reader shall continue to scan the current data block instead of querying the + // block index as long as it knows the target key is strictly smaller than + // the next indexed key or the current data block is the last data block. + return loadBlockAndSeekToKey(this.block, this.nextIndexedKey, + false, key, offset, length, false); + } } } - // Don't rewind on a reseek operation, because reseek implies that we are // always going forward in the file. return seekTo(key, offset, length, false); @@ -503,6 +525,7 @@ return false; } ByteBuffer firstKey = getFirstKeyInBlock(seekToBlock); + if (reader.getComparator().compare(firstKey.array(), firstKey.arrayOffset(), firstKey.limit(), key, offset, length) == 0) { @@ -519,11 +542,11 @@ seekToBlock = reader.readBlock(previousBlockOffset, seekToBlock.getOffset() - previousBlockOffset, cacheBlocks, pread, isCompaction, BlockType.DATA); - // TODO shortcut: seek forward in this block to the last key of the // block. } - loadBlockAndSeekToKey(seekToBlock, true, key, offset, length, true); + byte[] firstKeyInCurrentBlock = Bytes.getBytes(firstKey); + loadBlockAndSeekToKey(seekToBlock, firstKeyInCurrentBlock, true, key, offset, length, true); return true; } @@ -701,14 +724,17 @@ } @Override - protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind, - byte[] key, int offset, int length, boolean seekBefore) + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, + boolean rewind, byte[] key, int offset, int length, boolean seekBefore) throws IOException { if (block == null || block.getOffset() != seekToBlock.getOffset()) { updateCurrBlock(seekToBlock); } else if (rewind) { blockBuffer.rewind(); } + + // Update the nextIndexedKey + this.nextIndexedKey = nextIndexedKey; return blockSeek(key, offset, length, seekBefore); } @@ -733,6 +759,9 @@ blockBuffer = block.getBufferWithoutHeader(); readKeyValueLen(); blockFetches++; + + // Reset the next indexed key + this.nextIndexedKey = null; } private final void readKeyValueLen() { @@ -1018,14 +1047,15 @@ } @Override - protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, boolean rewind, - byte[] key, int offset, int length, boolean seekBefore) + protected int loadBlockAndSeekToKey(HFileBlock seekToBlock, byte[] nextIndexedKey, + boolean rewind, byte[] key, int offset, int length, boolean seekBefore) throws IOException { if (block == null || block.getOffset() != seekToBlock.getOffset()) { updateCurrentBlock(seekToBlock); } else if (rewind) { seeker.rewind(); } + this.nextIndexedKey = nextIndexedKey; return seeker.seekToKeyInBlock(key, offset, length, seekBefore); } } Index: src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java (revision 1399102) +++ src/test/java/org/apache/hadoop/hbase/HBaseTestCase.java (working copy) @@ -71,7 +71,7 @@ protected static final byte [][] COLUMNS = {fam1, fam2, fam3}; private boolean localfs = false; - protected Path testDir = null; + protected static Path testDir = null; protected FileSystem fs = null; protected HRegion root = null; protected HRegion meta = null; @@ -180,9 +180,15 @@ * @return An {@link HRegion} * @throws IOException */ - protected HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey, + public HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey, byte [] endKey) throws IOException { + return createNewHRegion(desc, startKey, endKey, this.conf); + } + + public HRegion createNewHRegion(HTableDescriptor desc, byte [] startKey, + byte [] endKey, Configuration conf) + throws IOException { FileSystem filesystem = FileSystem.get(conf); HRegionInfo hri = new HRegionInfo(desc.getName(), startKey, endKey); return HRegion.createHRegion(hri, testDir, conf, desc); @@ -248,10 +254,11 @@ * Adds data of the from 'aaa', 'aab', etc where key and value are the same. * @param r * @param columnFamily + * @param column * @throws IOException * @return count of what we added. */ - protected static long addContent(final HRegion r, final byte [] columnFamily) + public static long addContent(final HRegion r, final byte [] columnFamily, final byte[] column) throws IOException { byte [] startKey = r.getRegionInfo().getStartKey(); byte [] endKey = r.getRegionInfo().getEndKey(); @@ -259,7 +266,7 @@ if (startKeyBytes == null || startKeyBytes.length == 0) { startKeyBytes = START_KEY_BYTES; } - return addContent(new HRegionIncommon(r), Bytes.toString(columnFamily), null, + return addContent(new HRegionIncommon(r), Bytes.toString(columnFamily), Bytes.toString(column), startKeyBytes, endKey, -1); } @@ -267,18 +274,32 @@ * Add content to region r on the passed column * column. * Adds data of the from 'aaa', 'aab', etc where key and value are the same. + * @param r + * @param columnFamily + * @throws IOException + * @return count of what we added. + */ + protected static long addContent(final HRegion r, final byte [] columnFamily) + throws IOException { + return addContent(r, columnFamily, null); + } + + /** + * Add content to region r on the passed column + * column. + * Adds data of the from 'aaa', 'aab', etc where key and value are the same. * @param updater An instance of {@link Incommon}. * @param columnFamily * @throws IOException * @return count of what we added. */ protected static long addContent(final Incommon updater, - final String columnFamily) throws IOException { + final String columnFamily) throws IOException { return addContent(updater, columnFamily, START_KEY_BYTES, null); } protected static long addContent(final Incommon updater, final String family, - final String column) throws IOException { + final String column) throws IOException { return addContent(updater, family, column, START_KEY_BYTES, null); } Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (revision 1399102) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestHFileBlockIndex.java (working copy) @@ -380,8 +380,8 @@ // Now test we can get the offset and the on-disk-size using a // higher-level API function.s boolean locateBlockResult = - BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey, - searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR); + (BlockIndexReader.locateNonRootIndexEntry(nonRootIndex, arrayHoldingKey, + searchKey.length / 2, searchKey.length, Bytes.BYTES_RAWCOMPARATOR) != -1); if (i == 0) { assertFalse(locateBlockResult); Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java (revision 1399102) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestReseekTo.java (working copy) @@ -22,6 +22,8 @@ import java.util.ArrayList; import java.util.List; +import junit.framework.Assert; + import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; @@ -87,7 +89,7 @@ String value = valueList.get(i); long start = System.nanoTime(); scanner.reseekTo(Bytes.toBytes(key)); - assertEquals(value, scanner.getValueString()); + assertEquals("i is " + i, value, scanner.getValueString()); } reader.close(); Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java (revision 1399102) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestSeekTo.java (working copy) @@ -98,6 +98,89 @@ reader.close(); } + public void testSeekBeforeWithReSeekTo() throws Exception { + Path p = makeNewFile(); + HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); + reader.loadFileInfo(); + HFileScanner scanner = reader.getScanner(false, true); + assertEquals(false, scanner.seekBefore(toKV("a").getKey())); + assertEquals(false, scanner.seekBefore(toKV("b").getKey())); + assertEquals(false, scanner.seekBefore(toKV("c").getKey())); + + // seekBefore d, so the scanner points to c + assertEquals(true, scanner.seekBefore(toKV("d").getKey())); + assertEquals("c", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("c").getKey())); + assertEquals("c", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore e, so the scanner points to c + assertEquals(true, scanner.seekBefore(toKV("e").getKey())); + assertEquals("c", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore f, so the scanner points to e + assertEquals(true, scanner.seekBefore(toKV("f").getKey())); + assertEquals("e", toRowStr(scanner.getKeyValue())); + // reseekTo e and g + assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore g, so the scanner points to e + assertEquals(true, scanner.seekBefore(toKV("g").getKey())); + assertEquals("e", toRowStr(scanner.getKeyValue())); + // reseekTo e and g again + assertEquals(0, scanner.reseekTo(toKV("e").getKey())); + assertEquals("e", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore h, so the scanner points to g + assertEquals(true, scanner.seekBefore(toKV("h").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + // reseekTo g + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore i, so the scanner points to g + assertEquals(true, scanner.seekBefore(toKV("i").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + // reseekTo g + assertEquals(0, scanner.reseekTo(toKV("g").getKey())); + assertEquals("g", toRowStr(scanner.getKeyValue())); + + // seekBefore j, so the scanner points to i + assertEquals(true, scanner.seekBefore(toKV("j").getKey())); + assertEquals("i", toRowStr(scanner.getKeyValue())); + // reseekTo i + assertEquals(0, scanner.reseekTo(toKV("i").getKey())); + assertEquals("i", toRowStr(scanner.getKeyValue())); + + // seekBefore k, so the scanner points to i + assertEquals(true, scanner.seekBefore(toKV("k").getKey())); + assertEquals("i", toRowStr(scanner.getKeyValue())); + // reseekTo i and k + assertEquals(0, scanner.reseekTo(toKV("i").getKey())); + assertEquals("i", toRowStr(scanner.getKeyValue())); + assertEquals(0, scanner.reseekTo(toKV("k").getKey())); + assertEquals("k", toRowStr(scanner.getKeyValue())); + + // seekBefore l, so the scanner points to k + assertEquals(true, scanner.seekBefore(toKV("l").getKey())); + assertEquals("k", toRowStr(scanner.getKeyValue())); + // reseekTo k + assertEquals(0, scanner.reseekTo(toKV("k").getKey())); + assertEquals("k", toRowStr(scanner.getKeyValue())); + } + public void testSeekTo() throws Exception { Path p = makeNewFile(); HFile.Reader reader = HFile.createReader(fs, p, new CacheConfig(conf)); Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksScanned.java (working copy) @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.hfile.Compression; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.BlockMetricType; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@SuppressWarnings("deprecation") +@Category(SmallTests.class) +public class TestBlocksScanned extends HBaseTestCase { + private static byte [] TABLE = Bytes.toBytes("TestBlocksScanned"); + private static byte [] FAMILY = Bytes.toBytes("family"); + private static byte [] COL = Bytes.toBytes("col"); + private static byte [] START_KEY = Bytes.toBytes("aaa"); + private static byte [] END_KEY = Bytes.toBytes("zzz"); + private static int BLOCK_SIZE = 70; + + private static HBaseTestingUtility TEST_UTIL = null; + private static HTableDescriptor TESTTABLEDESC = null; + + @Override + public void setUp() throws Exception { + super.setUp(); + SchemaMetrics.setUseTableNameInTest(true); + TEST_UTIL = new HBaseTestingUtility(); + TESTTABLEDESC = new HTableDescriptor(TABLE); + + TESTTABLEDESC.addFamily( + new HColumnDescriptor(FAMILY) + .setMaxVersions(10) + .setBlockCacheEnabled(true) + .setBlocksize(BLOCK_SIZE) + .setCompressionType(Compression.Algorithm.NONE) + ); + } + + @Test + public void testBlocksScanned() throws Exception { + HRegion r = createNewHRegion(TESTTABLEDESC, START_KEY, END_KEY, + TEST_UTIL.getConfiguration()); + addContent(r, FAMILY, COL); + r.flushcache(); + + // Get the per-cf metrics + SchemaMetrics schemaMetrics = + SchemaMetrics.getInstance(Bytes.toString(TABLE), Bytes.toString(FAMILY)); + Map schemaMetricSnapshot = SchemaMetrics.getMetricsSnapshot(); + + // Do simple test of getting one row only first. + Scan scan = new Scan(Bytes.toBytes("aaa"), Bytes.toBytes("aaz")); + scan.addColumn(FAMILY, COL); + scan.setMaxVersions(1); + + InternalScanner s = r.getScanner(scan); + List results = new ArrayList(); + while (s.next(results)); + s.close(); + + int expectResultSize = 'z' - 'a'; + Assert.assertEquals(expectResultSize, results.size()); + + int kvPerBlock = (int) Math.ceil(BLOCK_SIZE / (double) results.get(0).getLength()); + Assert.assertEquals(2, kvPerBlock); + + long expectDataBlockRead = (long) Math.ceil(expectResultSize / (double) kvPerBlock); + long expectIndexBlockRead = expectDataBlockRead; + + verifyDataAndIndexBlockRead(schemaMetricSnapshot, schemaMetrics, + expectDataBlockRead, expectIndexBlockRead); + } + + private void verifyDataAndIndexBlockRead(Map previousMetricSnapshot, + SchemaMetrics schemaMetrics, long expectDataBlockRead, long expectedIndexBlockRead){ + Map currentMetricsSnapshot = SchemaMetrics.getMetricsSnapshot(); + Map diffs = + SchemaMetrics.diffMetrics(previousMetricSnapshot, currentMetricsSnapshot); + + long dataBlockRead = SchemaMetrics.getLong(diffs, + schemaMetrics.getBlockMetricName(BlockCategory.DATA, false, BlockMetricType.READ_COUNT)); + long indexBlockRead = SchemaMetrics.getLong(diffs, + schemaMetrics.getBlockMetricName(BlockCategory.INDEX, false, BlockMetricType.READ_COUNT)); + + Assert.assertEquals(expectDataBlockRead, dataBlockRead); + Assert.assertEquals(expectedIndexBlockRead, indexBlockRead); + } +}