Index: src/test/java/org/apache/hadoop/hbase/io/hfile/TestScannerSelectionUsingKeyRange.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/io/hfile/TestScannerSelectionUsingKeyRange.java (revision 0) +++ src/test/java/org/apache/hadoop/hbase/io/hfile/TestScannerSelectionUsingKeyRange.java (revision 0) @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.io.hfile; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.io.hfile.BlockType.BlockCategory; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.StoreFile.BloomType; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics; +import org.apache.hadoop.hbase.regionserver.metrics.SchemaMetrics.BlockMetricType; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Test the optimization that does not scan files where all key ranges are excluded. + */ +@RunWith(Parameterized.class) +@Category(SmallTests.class) +public class TestScannerSelectionUsingKeyRange { + private static final Log LOG = LogFactory.getLog(TestScannerSelectionUsingKeyRange.class); + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static String TABLE = "myTable"; + private static String FAMILY = "myCF"; + private static byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); + private static final int NUM_ROWS = 8; + private static final int NUM_COLS_PER_ROW = 5; + private static final int NUM_FILES = 2; + private static final Map TYPE_COUNT = new HashMap(3); + static { + TYPE_COUNT.put(BloomType.ROWCOL, 2); + TYPE_COUNT.put(BloomType.ROW, 2); + TYPE_COUNT.put(BloomType.NONE, 2); + } + + private BloomType bloomType; + private int expectedCount; + + @Parameters + public static Collection parameters() { + List params = new ArrayList(); + for (Object type : TYPE_COUNT.keySet()) { + params.add(new Object[] { type, TYPE_COUNT.get(type) }); + } + return params; + } + + public TestScannerSelectionUsingKeyRange(Object expectedType, Object expectedCount) { + bloomType = (BloomType)expectedType; + expectedCount = expectedCount; + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.cleanupTestDir(); + } + + @Test + public void testScannerSelection() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + conf.setInt("hbase.hstore.compactionThreshold", 10000); + HColumnDescriptor hcd = new HColumnDescriptor(FAMILY_BYTES).setBlockCacheEnabled(true) + .setBloomFilterType(bloomType); + HTableDescriptor htd = new HTableDescriptor(TABLE); + htd.addFamily(hcd); + HRegionInfo info = new HRegionInfo(Bytes.toBytes(TABLE)); + HRegion region = HRegion.createHRegion(info, TEST_UTIL.getClusterTestDir(), conf, htd); + + for (int iFile = 0; iFile < NUM_FILES; ++iFile) { + for (int iRow = 0; iRow < NUM_ROWS; ++iRow) { + Put put = new Put(Bytes.toBytes("row" + iRow)); + for (int iCol = 0; iCol < NUM_COLS_PER_ROW; ++iCol) { + put.add(FAMILY_BYTES, Bytes.toBytes("col" + iCol), + Bytes.toBytes("value" + iFile + "_" + iRow + "_" + iCol)); + } + region.put(put); + } + region.flushcache(); + } + + Scan scan = new Scan(Bytes.toBytes("aaa"), Bytes.toBytes("aaz")); + CacheConfig cacheConf = new CacheConfig(conf); + LruBlockCache cache = (LruBlockCache) cacheConf.getBlockCache(); + cache.clearCache(); + Map metricsBefore = SchemaMetrics.getMetricsSnapshot(); + SchemaMetrics.validateMetricChanges(metricsBefore); + InternalScanner scanner = region.getScanner(scan); + List results = new ArrayList(); + while (scanner.next(results)) { + } + scanner.close(); + assertEquals(0, results.size()); + Set accessedFiles = cache.getCachedFileNamesForTest(); + assertEquals(accessedFiles.size(), 0); + //assertEquals(cache.getBlockCount(), 0); + Map diffMetrics = SchemaMetrics.diffMetrics(metricsBefore, + SchemaMetrics.getMetricsSnapshot()); + SchemaMetrics schemaMetrics = SchemaMetrics.getInstance(TABLE, FAMILY); + long dataBlockRead = SchemaMetrics.getLong(diffMetrics, + schemaMetrics.getBlockMetricName(BlockCategory.DATA, false, BlockMetricType.READ_COUNT)); + assertEquals(dataBlockRead, 0); + region.close(); + } +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (revision 1460990) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (working copy) @@ -368,9 +368,8 @@ } @Override - public boolean shouldUseScanner(Scan scan, SortedSet columns, - long oldestUnexpiredTS) { - return reader.passesTimerangeFilter(scan, oldestUnexpiredTS) && - reader.passesBloomFilter(scan, columns); + public boolean shouldUseScanner(Scan scan, SortedSet columns, long oldestUnexpiredTS) { + return reader.passesTimerangeFilter(scan, oldestUnexpiredTS) + && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, columns); } } Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (revision 1460990) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (working copy) @@ -1610,6 +1610,28 @@ return true; } + /** + * Checks whether the given scan rowkey range overlaps with the current storefile's + * @param scan the scan specification. Used to determine the rowkey range. + * @return true if there is overlap, false otherwise + */ + boolean passesKeyRangeFilter(Scan scan) { + if (this.getFirstKey() == null || this.getLastKey() == null) { + // the file is empty + return false; + } + if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW) + && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) { + return true; + } + KeyValue startKeyValue = KeyValue.createFirstOnRow(scan.getStartRow()); + KeyValue stopKeyValue = KeyValue.createLastOnRow(scan.getStopRow()); + boolean nonOverLapping = (getComparator().compare(this.getFirstKey(), + stopKeyValue.getKey()) > 0 && !Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) + || getComparator().compare(this.getLastKey(), startKeyValue.getKey()) < 0; + return !nonOverLapping; + } + public Map loadFileInfo() throws IOException { Map fi = reader.loadFileInfo();