Index: hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java (revision 1471249) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -2229,7 +2229,21 @@ } /** - * Similar to {@link #createLastOnRow(byte[], int, int, byte[], int, int, + * Similar to {@link #createLastOnRow(byte[], int, int, byte[], int, int, byte[], int, int)} + * but create the last key on the row of this KV + * (the value part of the returned KV is always empty). Used in creating + * "fake keys" to skip the row we already know is not in the file. + * @return the last key on the row of the given key-value pair + */ + public KeyValue createLastOnRow() { + return new KeyValue( + bytes, getRowOffset(), getRowLength(), + null, 0, 0, + null, 0, 0, + HConstants.OLDEST_TIMESTAMP, Type.Minimum, null, 0, 0); + } + + /** * Similar to {@link #createLastOnRow(byte[], int, int, byte[], int, int, * byte[], int, int)} but creates the last key on the row/column of this KV * (the value part of the returned KV is always empty). Used in creating * "fake keys" for the multi-column Bloom filter optimization to skip the @@ -2607,11 +2621,11 @@ // for specifying the last key/value in a given row, because there is no // "lexicographically last column" (it would be infinitely long). The // "maximum" key type does not need this behavior. - if (lcolumnlength == 0 && ltype == Type.Minimum.getCode()) { + if (lcolumnlength == 0 && rcolumnlength != 0 && ltype == Type.Minimum.getCode()) { // left is "bigger", i.e. it appears later in the sorted order return 1; } - if (rcolumnlength == 0 && rtype == Type.Minimum.getCode()) { + if (rcolumnlength == 0 && lcolumnlength != 0 && rtype == Type.Minimum.getCode()) { return -1; } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (revision 1471249) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (working copy) @@ -281,6 +281,16 @@ int qualLength = keyLength + KeyValue.ROW_OFFSET - (offset - initialOffset) - KeyValue.TIMESTAMP_TYPE_SIZE; + byte type = kv.getType(); + // Bloom Filter optimization. See StoreFileScanner.requestSeek(...). + if (type == KeyValue.Type.Minimum.getCode()) { + if (familyLength == 0) { + return MatchCode.SKIP; + } else { + return columns.getNextRowOrNextColumn(bytes, offset, qualLength); + } + } + long timestamp = kv.getTimestamp(); // check for early out based on timestamp alone if (columns.isDone(timestamp)) { @@ -300,7 +310,6 @@ * 7. Delete marker need to be version counted together with puts * they affect */ - byte type = kv.getType(); if (kv.isDelete()) { if (!keepDeletedCells) { // first ignore delete markers if the scanner can do so, and the Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (revision 1471249) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (working copy) @@ -1268,22 +1268,23 @@ } byte[] key; + int keyOffset, keyLen; switch (bloomFilterType) { case ROW: if (col != null) { throw new RuntimeException("Row-only Bloom filter called with " + "column specified"); } - if (rowOffset != 0 || rowLen != row.length) { - throw new AssertionError("For row-only Bloom filters the row " - + "must occupy the whole array"); - } + keyOffset = rowOffset; + keyLen = rowLen; key = row; break; case ROWCOL: key = bloomFilter.createBloomKey(row, rowOffset, rowLen, col, colOffset, colLen); + keyOffset = 0; + keyLen = key.length; break; default: @@ -1313,7 +1314,8 @@ // from the file info. For row-column Bloom filters this is not yet // a sufficient condition to return false. boolean keyIsAfterLast = lastBloomKey != null - && bloomFilter.getComparator().compare(key, lastBloomKey) > 0; + && bloomFilter.getComparator().compare(key, keyOffset, keyLen, + lastBloomKey, 0, lastBloomKey.length) > 0; if (bloomFilterType == BloomType.ROWCOL) { // Since a Row Delete is essentially a DeleteFamily applied to all @@ -1335,7 +1337,7 @@ } } else { exists = !keyIsAfterLast - && bloomFilter.contains(key, 0, key.length, bloom); + && bloomFilter.contains(key, keyOffset, keyLen, bloom); } return exists; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (revision 1471249) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (working copy) @@ -280,23 +280,32 @@ @Override public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) throws IOException { + /* if (kv.getFamilyLength() == 0) { useBloom = false; } - + */ boolean haveToSeek = true; + boolean skipRow = false; if (useBloom) { - // check ROWCOL Bloom filter first. - if (reader.getBloomFilterType() == BloomType.ROWCOL) { - haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(), + // check ROW or ROWCOL Bloom filter first. + if (reader.getBloomFilterType() == BloomType.ROW) { + haveToSeek = reader.passesGeneralBloomFilter(kv.getRowArray(), + kv.getRowOffset(), kv.getRowLength(), null, 0, 0); + skipRow = !haveToSeek; + } + if (haveToSeek) { + if (reader.getBloomFilterType() == BloomType.ROWCOL) { + haveToSeek = reader.passesGeneralBloomFilter(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength()); - } else if (this.matcher != null && !matcher.hasNullColumnInQuery() && - kv.isDeleteFamily()) { - // if there is no such delete family kv in the store file, - // then no need to seek. - haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(), - kv.getRowOffset(), kv.getRowLength()); + } else if (this.matcher != null && !matcher.hasNullColumnInQuery() && + kv.isDeleteFamily()) { + // if there is no such delete family kv in the store file, + // then no need to seek. + haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getBuffer(), + kv.getRowOffset(), kv.getRowLength()); + } } } @@ -334,7 +343,7 @@ // key/value and the store scanner will progress to the next column. This // is obviously not a "real real" seek, but unlike the fake KV earlier in // this method, we want this to be propagated to ScanQueryMatcher. - cur = kv.createLastOnRowCol(); + cur = skipRow ? kv.createLastOnRow() : kv.createLastOnRowCol(); realSeekDone = true; return true; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (revision 1471249) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (working copy) @@ -61,8 +61,6 @@ // Doesnt need to be volatile because it's always accessed via synchronized methods protected boolean closing = false; protected final boolean isGet; - protected final boolean explicitColumnQuery; - protected final boolean useRowColBloom; /** * A flag that enables StoreFileScanner parallel-seeking */ @@ -98,17 +96,11 @@ this.cacheBlocks = cacheBlocks; isGet = scan.isGetScan(); int numCol = columns == null ? 0 : columns.size(); - explicitColumnQuery = numCol > 0; this.scan = scan; this.columns = columns; oldestUnexpiredTS = EnvironmentEdgeManager.currentTimeMillis() - ttl; this.minVersions = minVersions; - // We look up row-column Bloom filters for multi-column queries as part of - // the seek operation. However, we also look the row-column Bloom filter - // for multi-row (non-"get") scans because this is not done in - // StoreFile.passesBloomFilter(Scan, SortedSet). - useRowColBloom = numCol > 1 || (!isGet && numCol == 1); // The parallel-seeking is on : // 1) the config value is *true* // 2) store has more than one store file @@ -150,9 +142,9 @@ // key does not exist, then to the start of the next matching Row). // Always check bloom filter to optimize the top row seek for delete // family marker. - if (explicitColumnQuery && lazySeekEnabledGlobally) { + if (lazySeekEnabledGlobally) { for (KeyValueScanner scanner : scanners) { - scanner.requestSeek(matcher.getStartKey(), false, true); + scanner.requestSeek(matcher.getStartKey(), false, false); } } else { if (!isParallelSeekEnabled) { @@ -590,8 +582,8 @@ //If called from RegionScanner.reseek(...) make sure the scanner //stack is reset if needed. checkReseek(); - if (explicitColumnQuery && lazySeekEnabledGlobally) { - return heap.requestSeek(kv, true, useRowColBloom); + if (lazySeekEnabledGlobally) { + return heap.requestSeek(kv, true, true); } return heap.reseek(kv); } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithRowBloom.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithRowBloom.java (revision 0) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanWithRowBloom.java (working copy) @@ -0,0 +1,85 @@ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.io.hfile.CacheConfig; +import org.apache.hadoop.hbase.io.hfile.HFile; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; + +public class TestScanWithRowBloom { + private HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private final String DIR = TEST_UTIL.getDataTestDir("TestBlocksRead").toString(); + private static final String FAM_STRING = "fam"; + private static final byte[] FAM = Bytes.toBytes(FAM_STRING); + + private static BlockCache blockCache; + + @Test + public void testScanWithRowBloom() throws Exception { + HRegion r = initHRegion(Bytes.toBytes("TestScanWithRowBloom"), "testScanWithRowBloom", getConf(), FAM_STRING); + putData(r, "row", "col1", 0); + r.flushcache(); + putData(r, "row1", "col1", 0); + r.flushcache(); + putData(r, "row2", "col1", 0); + r.flushcache(); + long start = getBlkAccessCount(); + RegionScanner rs = r.getScanner(new Scan()); + rs.reseek(Bytes.toBytes("row2")); + rs.next(new ArrayList()); + assertEquals(1, getBlkAccessCount()-start); + } + + private static long getBlkAccessCount() { + return HFile.dataBlockReadCnt.get(); +} + + private void putData(HRegion r, String row, String col, long version) throws IOException { + byte columnBytes[] = Bytes.toBytes(col); + Put put = new Put(Bytes.toBytes(row)); + put.setDurability(Durability.SKIP_WAL); + put.add(FAM, columnBytes, version, Bytes.toBytes("Value:" + row + "#" + col + "#" + version)); + r.put(put); + } + + private Configuration getConf() { + Configuration conf = TEST_UTIL.getConfiguration(); + + // disable compactions in this test. + conf.setInt("hbase.hstore.compactionThreshold", 10000); + return conf; + } + + // lifted from TestBlocksRead + private HRegion initHRegion(byte[] tableName, String callingMethod, + Configuration conf, String family) throws IOException { + HTableDescriptor htd = new HTableDescriptor(tableName); + HColumnDescriptor familyDesc; + familyDesc = new HColumnDescriptor(family) + .setBlocksize(1) + .setBloomFilterType(BloomType.ROW); + htd.addFamily(familyDesc); + + HRegionInfo info = new HRegionInfo(htd.getName(), null, null, false); + Path path = new Path(DIR + callingMethod); + HRegion r = HRegion.createHRegion(info, path, conf, htd); + blockCache = new CacheConfig(conf).getBlockCache(); + return r; + } +}