diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 22bffee..666b357 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -63,8 +63,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { @Override public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory, final boolean cacheDataInL1) { - boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; - if (isMetaBlock || cacheDataInL1) { + boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; + if (metaBlock || cacheDataInL1) { lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1); } else { l2Cache.cacheBlock(cacheKey, buf, inMemory, false); @@ -81,12 +81,9 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { boolean repeat, boolean updateCacheMetrics) { // TODO: is there a hole here, or just awkwardness since in the lruCache getBlock // we end up calling l2Cache.getBlock. - if (lruCache.containsBlock(cacheKey)) { - return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); - } - Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); - - return result; + return lruCache.containsBlock(cacheKey)? + lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics): + l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java index eae713f..a278472 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java @@ -29,6 +29,9 @@ import org.apache.hadoop.hbase.client.Scan; * Scanner that returns the next KeyValue. */ @InterfaceAudience.Private +// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner +// so this should be something else altogether, a decoration on our base CellScanner. TODO. +// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. public interface KeyValueScanner extends Shipper { /** * The byte array represents for NO_NEXT_INDEXED_KEY; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java index c220b5c..e7981b1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java @@ -93,7 +93,7 @@ public class ScanQueryMatcher { /* row is not private for tests */ /** Row the query is on */ Cell curCell; - + /** * Oldest put in any of the involved store files * Used to decide whether it is ok to delete @@ -119,7 +119,7 @@ public class ScanQueryMatcher { * first column. * */ private boolean hasNullColumn = true; - + private RegionCoprocessorHost regionCoprocessorHost= null; // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete @@ -140,12 +140,17 @@ public class ScanQueryMatcher { // currently influencing. This is because Puts, that this delete can // influence. may appear out of order. private final long timeToPurgeDeletes; - + private final boolean isUserScan; private final boolean isReversed; /** + * True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan. + */ + private final boolean get; + + /** * Construct a QueryMatcher for a scan * @param scan * @param scanInfo The store's immutable scan info @@ -154,8 +159,8 @@ public class ScanQueryMatcher { * @param earliestPutTs Earliest put seen in any of the store files. * @param oldestUnexpiredTS the oldest timestamp we are interested in, * based on TTL - * @param regionCoprocessorHost - * @throws IOException + * @param regionCoprocessorHost + * @throws IOException */ public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet columns, ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, @@ -166,6 +171,7 @@ public class ScanQueryMatcher { } else { this.tr = timeRange; } + this.get = scan.isGetScan(); this.rowComparator = scanInfo.getComparator(); this.regionCoprocessorHost = regionCoprocessorHost; this.deletes = instantiateDeleteTracker(); @@ -234,8 +240,8 @@ public class ScanQueryMatcher { * @param now the current server time * @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW. * @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW. - * @param regionCoprocessorHost - * @throws IOException + * @param regionCoprocessorHost + * @throws IOException */ public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet columns, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now, @@ -280,7 +286,7 @@ public class ScanQueryMatcher { * caused by a data corruption. */ public MatchCode match(Cell cell) throws IOException { - if (filter != null && filter.filterAllRemaining()) { + if (filter != null && filter.filterAllRemaining()) { return MatchCode.DONE_SCAN; } if (curCell != null) { @@ -324,7 +330,7 @@ public class ScanQueryMatcher { // check if the cell is expired by cell TTL if (HStore.isCellTTLExpired(cell, this.oldestUnexpiredTS, this.now)) { return MatchCode.SKIP; - } + } /* * The delete logic is pretty complicated now. @@ -359,10 +365,10 @@ public class ScanQueryMatcher { } // Can't early out now, because DelFam come before any other keys } - + if ((!isUserScan) && timeToPurgeDeletes > 0 - && (EnvironmentEdgeManager.currentTime() - timestamp) + && (EnvironmentEdgeManager.currentTime() - timestamp) <= timeToPurgeDeletes) { return MatchCode.INCLUDE; } else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) { @@ -503,22 +509,26 @@ public class ScanQueryMatcher { } } + /** + * @return Returns false if we know there are no more rows to be scanned (We've reached the + * stopRow or we are scanning on row only because this Scan is for a Get, etc. + */ public boolean moreRowsMayExistAfter(Cell kv) { - if (this.isReversed) { - if (rowComparator.compareRows(kv, stopRow, 0, stopRow.length) <= 0) { - return false; - } else { - return true; - } - } - if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) && - rowComparator.compareRows(kv, stopRow, 0, stopRow.length) >= 0) { - // KV >= STOPROW - // then NO there is nothing left. + // If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) -- + // then we are looking at one row only, the one specified in the Get coordinate..so we know + // for sure that there are no more rows on this Scan + if (this.get) { return false; - } else { + } + // If no stopRow, return that may be more rows. The tests that follow depend on a non-empty + // stopRow so this little compare short-circuits out doing the following compares. + if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) { return true; } + return this.isReversed? + rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0: + Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) || + rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 2f0d284..b9aa50e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -649,11 +649,55 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } - /* - * See if we should actually SEEK or rather just SKIP to the next Cell. - * (see HBASE-13109) + /** + * See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109). + * Via a coarse heuristic, this optimize tries to squash calls to SEEK when it can because it is + * much faster doing SKIP, SKIP, SKIP... than it is to SEEK. SEEK is really expensive. Only SEEK + * when we can be near sure that we are going to the next block. + * + *

BE CAREFUL reading the below. It is a little opaque what is going on. The return from + * {@link #getNextIndexedKey()} is not a 'real' key, it is what is returned up out of + * KeyValueUtil#createLastOnRow which is: + * currentCell, 0, 0, null, 0, 0, HConstants.OLDEST_TIMESTAMP, Type.Minimum.getCode() + * It's not a Cell that really exist in a file, it's just a Cell tailored for seeking exactly past + * the current row, so the _next_ Cell we see is from the next row. THIS IS WRONG!!!! FIX!!! + * + *

Here is more on this optimization. + *

+ *

A good proxy (best effort) to determine whether INCLUDE/SKIP is better than SEEK is whether + * we'll likely end up seeking to the next block (or past the next block) to get our next column. + * Example: + *

+   * |    BLOCK 1              |     BLOCK 2                   |
+   * |  r1/c1, r1/c2, r1/c3    |    r1/c4, r1/c5, r2/c1        |
+   *                                   ^         ^
+   *                                   |         |
+   *                           Next Index Key   SEEK_NEXT_ROW (before r2/c1)
+   *
+   *
+   * |    BLOCK 1                       |     BLOCK 2                      |
+   * |  r1/c1/t5, r1/c1/t4, r1/c1/t3    |    r1/c1/t2, r1/c1/T1, r1/c2/T3  |
+   *                                            ^              ^
+   *                                            |              |
+   *                                    Next Index Key        SEEK_NEXT_COL
+   * 
+ * Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4 + * is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only + * want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at + * the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios + * where the SEEK will not land us in the next block, it is very likely better to issues a series + * of SKIPs. */ - private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) { + @VisibleForTesting + protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) { switch(qcode) { case INCLUDE_AND_SEEK_NEXT_COL: case SEEK_NEXT_COL: @@ -668,10 +712,15 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner case INCLUDE_AND_SEEK_NEXT_ROW: case SEEK_NEXT_ROW: { - Cell nextIndexedKey = getNextIndexedKey(); - if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY - && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) { - return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE; + // If it is a Get Scan, then we know that we are done with this row; there are no more + // rows beyond the current one: don't try to optimize. We are DONE. Return the *_NEXT_ROW as + // is. When the caller gets these flags on a Get Scan, it knows to shut down the Scan. + if (!this.scan.isGetScan()) { + Cell nextIndexedKey = getNextIndexedKey(); + if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY + && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) { + return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE; + } } break; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java index 9fc068f..4720880 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java @@ -30,8 +30,7 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner; /** - * Utility scanner that wraps a sortable collection and serves - * as a KeyValueScanner. + * Utility scanner that wraps a sortable collection and serves as a KeyValueScanner. */ @InterfaceAudience.Private public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java index 3f87a00..a4e7f9b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java @@ -22,6 +22,7 @@ package org.apache.hadoop.hbase.regionserver; import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.util.CollectionBackedScanner; @@ -33,9 +34,8 @@ import org.apache.hadoop.hbase.util.CollectionBackedScanner; * to be a store file scanner. */ public class KeyValueScanFixture extends CollectionBackedScanner { - public KeyValueScanFixture(CellComparator comparator, - KeyValue... incData) { - super(comparator, incData); + public KeyValueScanFixture(CellComparator comparator, Cell... cells) { + super(comparator, cells); } public static List scanFixture(KeyValue[] ... kvArrays) { @@ -45,4 +45,4 @@ public class KeyValueScanFixture extends CollectionBackedScanner { } return scanners; } -} +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java index a8c2c65..0e96682 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java @@ -46,8 +46,7 @@ public class TestKeyValueScanFixture extends TestCase { KeyValueTestUtil.create("RowB", "family", "qf1", 10, KeyValue.Type.Put, "value-10") }; - KeyValueScanner scan = new KeyValueScanFixture( - CellComparator.COMPARATOR, kvs); + KeyValueScanner scan = new KeyValueScanFixture(CellComparator.COMPARATOR, kvs); KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA")); // should seek to this: diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java index 92c85aa..1461d0a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture; +import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; @@ -27,16 +28,21 @@ import java.util.Arrays; import java.util.List; import java.util.NavigableSet; import java.util.TreeSet; +import java.util.concurrent.atomic.AtomicInteger; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; +import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; @@ -53,16 +59,59 @@ import org.junit.rules.TestRule; // Can't be small as it plays with EnvironmentEdgeManager @Category({RegionServerTests.class, MediumTests.class}) public class TestStoreScanner { + private static final Log LOG = LogFactory.getLog(TestStoreScanner.class); @Rule public TestName name = new TestName(); @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). withLookingForStuckThread(true).build(); private static final String CF_STR = "cf"; - final byte [] CF = Bytes.toBytes(CF_STR); + private static final byte [] CF = Bytes.toBytes(CF_STR); static Configuration CONF = HBaseConfiguration.create(); private ScanInfo scanInfo = new ScanInfo(CONF, CF, 0, Integer.MAX_VALUE, Long.MAX_VALUE, KeepDeletedCells.FALSE, 0, CellComparator.COMPARATOR); private ScanType scanType = ScanType.USER_SCAN; + private static final byte [] ZERO = new byte [] {'0'}; + private static final byte [] ZERO_POINT_ZERO = new byte [] {'0', '.', '0'}; + private static final byte [] ONE = new byte [] {'1'}; + private static final byte [] TWO = new byte [] {'2'}; + private static final byte [] TWO_POINT_TWO = new byte [] {'2', '.', '2'}; + private static final byte [] THREE = new byte [] {'3'}; + private static final byte [] FOUR = new byte [] {'4'}; + private static final byte [] FIVE = new byte [] {'5'}; + private static final byte [] VALUE = new byte [] {'v'}; + private static final int CELL_GRID_ROW2_OFFSET = 4; + private static final int CELL_GRID_ROW3_OFFSET = 11; + private static final int CELL_GRID_ROW4_OFFSET = 15; + /** + * Four rows of four columns distinguished by column qualifier (column qualifier is one of the + * four rows... ONE, TWO, etc.) except we have a weird row after TWO; it is TWO_POINT_TWO. + * This weird shape is to test optimize. + */ + private static final Cell [] CELL_GRID = new Cell [] { + CellUtil.createCell(ONE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(ONE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(ONE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(ONE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), + // Offset 4 CELL_GRID_ROW2_OFFSET + CellUtil.createCell(TWO, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO_POINT_TWO, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO_POINT_TWO, CF, ZERO_POINT_ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(TWO_POINT_TWO, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), VALUE), + // Offset 11! CELL_GRID_ROW3_OFFSET + CellUtil.createCell(THREE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(THREE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(THREE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(THREE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), + // Offset 15 CELL_GRID_ROW4_OFFSET + CellUtil.createCell(FOUR, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(FOUR, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(FOUR, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), + CellUtil.createCell(FOUR, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), + }; + /* * Test utility for building a NavigableSet for scanners. * @param strCols @@ -77,6 +126,161 @@ public class TestStoreScanner { return cols; } + /** + * Test optimize in StoreScanner. Test that we skip to the next 'block' when we it makes sense + * reading the block 'index'. + * @throws IOException + */ + @Test + public void testOptimize() throws IOException { + List scanners = Arrays.asList( + new KeyValueScanner[] {new KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)}); + Scan scan = new Scan(); + // A scan that just gets the first qualifier on each row of the CELL_GRID + scan.addColumn(CF, ONE); + // Count of how often optimize is called and of how often it does something. + final AtomicInteger count = new AtomicInteger(0); + final AtomicInteger optimization = new AtomicInteger(0); + StoreScanner scanner = + new StoreScanner(scan, scanInfo, scanType, scan.getFamilyMap().get(CF), scanners) { + protected org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode + optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode qcode, Cell cell) { + count.incrementAndGet(); + org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode after = + super.optimize(qcode, cell); + LOG.info("Cell=" + cell + ", nextIndex=" + CellUtil.toString(getNextIndexedKey(), false) + + ", before=" + qcode + ", after=" + after); + if (qcode != after) { + optimization.incrementAndGet(); + } + return after; + }; + + @Override + public Cell getNextIndexedKey() { + // Fake the index so it is as though we have four blocks with a row per block except in the + // second block -- see CELL_GRID -- where we have two rows, TWO and TWO_POINT_TWO. + // Return the 2nd row and then the 3rd row, etc. as the index of the next block dependent + // on how many times we've been through as though the next 'block' begins w/ second row + // according to the index and the third block with the third, etc. + return count.get() > CELL_GRID_ROW3_OFFSET? + CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_ROW4_OFFSET]): + count.get() > CELL_GRID_ROW2_OFFSET? + CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_ROW3_OFFSET]): + CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_ROW2_OFFSET]); + } + }; + try { + List results = new ArrayList(); + while (scanner.next(results)) { + continue; + } + // Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the + // TWO_POINT_TWO row does not have a a column ONE. + Assert.assertEquals(4, results.size()); + for (Cell cell: results) { + assertTrue(Bytes.equals(ONE, 0, ONE.length, + cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength())); + } + Assert.assertTrue("Optimize should do some optimizations", optimization.get() > 0); + } finally { + scanner.close(); + } + } + + /** + * Ensure the optimize Scan method in StoreScanner does not get in the way of a Get doing minimum + * work... seeking to start of block and then SKIPPING until we find the wanted Cell. + * This 'simple' scenario mimics case of all Cells fitting inside a single HFileBlock. + * See HBASE-15392. This test is a little cryptic. Takes a bit of staring to figure what it up to. + * @throws IOException + */ + @Test + public void testOptimizeAndGet() throws IOException { + List scanners = Arrays.asList( + new KeyValueScanner[] {new KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)}); + // First test a Get of two columns in the row R2. Every Get is a Scan. Get columns named + // R2 and R3. + Get get = new Get(TWO); + get.addColumn(CF, TWO); + get.addColumn(CF, THREE); + Scan scan = new Scan(get); + // Count of how often optimize is called. + final AtomicInteger count = new AtomicInteger(0); + StoreScanner scanner = + new StoreScanner(scan, scanInfo, scanType, scan.getFamilyMap().get(CF), scanners) { + protected org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode + optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode qcode, Cell cell) { + count.incrementAndGet(); + return super.optimize(qcode, cell); + }; + }; + try { + List results = new ArrayList(); + // For a Get there should be no more next's after the first call. + Assert.assertEquals(false, scanner.next(results)); + // Should be one result only. + Assert.assertEquals(2, results.size()); + // And we should have gone through optimize twice only. + Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW", + 3, count.get()); + } finally { + scanner.close(); + } + } + + /** + * Ensure that optimize does not cause the Get to do more seeking than required. Optimize + * (see HBASE-15392) was causing us to seek all Cells in a block when a Get Scan if the next block + * index/start key was a different row to the current one. A bug. We'd call next too often + * because we had to exhaust all Cells in the current row making us load the next block just to + * discard what we read there. This test is a little cryptic. Takes a bit of staring to figure + * what it up to. + * @throws IOException + */ + @Test + public void testOptimizeAndGetWithFakedNextBlockIndexStart() throws IOException { + List scanners = Arrays.asList( + new KeyValueScanner[] {new KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)}); + // First test a Get of second column in the row R2. Every Get is a Scan. Second column has a + // qualifier of R2. + Get get = new Get(THREE); + get.addColumn(CF, TWO); + Scan scan = new Scan(get); + // Count of how often optimize is called. + final AtomicInteger count = new AtomicInteger(0); + StoreScanner scanner = + new StoreScanner(scan, scanInfo, scanType, scan.getFamilyMap().get(CF), scanners) { + protected org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode + optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode qcode, Cell cell) { + count.incrementAndGet(); + return super.optimize(qcode, cell); + }; + + @Override + public Cell getNextIndexedKey() { + // Fake out StoreScanner that the next 'block' is the first Cell of the 'next' row. This + // brings on the bug seen in HBASE-15392 where instead of returning when we had all we + // needed for our Get, instead, we'd keep skipping all Cells on the current row till we + // ran into the next row. CELL_GRID[8] is first Cell of the new row R3 (Our Get is against + // row R2). + return CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_ROW4_OFFSET]); + } + }; + try { + List results = new ArrayList(); + // For a Get there should be no more next's after the first call. + Assert.assertEquals(false, scanner.next(results)); + // Should be one result only. + Assert.assertEquals(1, results.size()); + // And we should have gone through optimize twice only. + Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW", + 2, count.get()); + } finally { + scanner.close(); + } + } + @Test public void testScanTimeRange() throws IOException { String r1 = "R1";