Index: src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java (revision 1230825) +++ src/test/java/org/apache/hadoop/hbase/regionserver/TestBlocksRead.java (working copy) @@ -258,8 +258,8 @@ assertEquals(1, kvs.length); verifyData(kvs[0], "row", "col1", 3); - // Baseline expected blocks read: 4 - kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 4); + // Expected block reads: 3 + kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2"), 3); assertEquals(2, kvs.length); verifyData(kvs[0], "row", "col1", 3); verifyData(kvs[1], "row", "col2", 4); @@ -268,8 +268,8 @@ putData(FAMILY, "row", "col3", 5); region.flushcache(); - // Baseline expected blocks read: 5 - kvs = getData(FAMILY, "row", "col3", 5); + // Baseline expected blocks read: 3 + kvs = getData(FAMILY, "row", "col3", 3); assertEquals(1, kvs.length); verifyData(kvs[0], "row", "col3", 5); @@ -314,8 +314,8 @@ putData(FAMILY, "row", "col3", 13); region.flushcache(); - // Baseline expected blocks read: 13 - kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 13); + // Baseline expected blocks read: 9 + kvs = getData(FAMILY, "row", Arrays.asList("col1", "col2", "col3"), 9); assertEquals(3, kvs.length); verifyData(kvs[0], "row", "col1", 11); verifyData(kvs[1], "row", "col2", 12); Index: src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java (working copy) @@ -26,14 +26,13 @@ import java.util.SortedSet; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.regionserver.AbstractKeyValueScanner; -import org.apache.hadoop.hbase.regionserver.KeyValueScanner; +import org.apache.hadoop.hbase.regionserver.NonLazyKeyValueScanner; /** * Utility scanner that wraps a sortable collection and serves * as a KeyValueScanner. */ -public class CollectionBackedScanner extends AbstractKeyValueScanner { +public class CollectionBackedScanner extends NonLazyKeyValueScanner { final private Iterable data; final KeyValue.KVComparator comparator; private Iterator iter; Index: src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java (working copy) @@ -57,16 +57,6 @@ public boolean reseek(KeyValue key) throws IOException; /** - * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only - * does a seek operation after checking that it is really necessary for the - * row/column combination specified by the kv parameter. This function was - * added to avoid unnecessary disk seeks on multi-column get queries using - * Bloom filter checking. Should only be used for queries where the set of - * columns is specified exactly. - */ - public boolean seekExactly(KeyValue kv, boolean forward) throws IOException; - - /** * Get the sequence id associated with this KeyValueScanner. This is required * for comparing multiple files to find out which one has the latest data. * The default implementation for this would be to return 0. A file having @@ -78,4 +68,37 @@ * Close the KeyValue scanner. */ public void close(); + + // "Lazy scanner" optimizations + + /** + * Similar to {@link #seek} (or {@link #reseek} if forward is true) but only + * does a seek operation after checking that it is really necessary for the + * row/column combination specified by the kv parameter. This function was + * added to avoid unnecessary disk seeks by checking row-column Bloom filters + * before a seek on multi-column get/scan queries, and to optimize by looking + * up more recent files first. + * @param forward do a forward-only "reseek" instead of a random-access seek + * @param useBloom whether to enable multi-column Bloom filter optimization + */ + public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) + throws IOException; + + /** + * We optimize our store scanners by checking the most recent store file + * first, so we sometimes pretend we have done a seek but delay it until the + * store scanner bubbles up to the top of the key-value heap. This method is + * then used to ensure the top store file scanner has done a seek operation. + */ + public boolean realSeekDone(); + + /** + * Does the real seek operation in case it was skipped by + * {@link #seekToRowCol(KeyValue, boolean)}. Note that this function should + * be never called on scanners that always do real seek operations (i.e. most + * of the scanners). The easiest way to achieve this is to call + * {@link #realSeekDone()} first. + */ + public void enforceSeek() throws IOException; + } Index: src/main/java/org/apache/hadoop/hbase/regionserver/ColumnCount.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/ColumnCount.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/ColumnCount.java (working copy) @@ -106,16 +106,4 @@ this.count = count; } - - /** - * Check to see if needed to fetch more versions - * @param max - * @return true if more versions are needed, false otherwise - */ - public boolean needMore(int max) { - if(this.count < max) { - return true; - } - return false; - } } Index: src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java (working copy) @@ -59,12 +59,6 @@ /** Row the query is on */ protected byte [] row; - - /** - * True if we are only interested in the given exact set of columns. In that - * case we can use Bloom filters to avoid unnecessary disk seeks. - */ - private boolean exactColumnQuery; /** * Constructs a ScanQueryMatcher for a Scan. @@ -96,7 +90,6 @@ // between rows, not between storefiles. this.columns = new ExplicitColumnTracker(columns, minVersions, maxVersions, ttl); - exactColumnQuery = true; } } @@ -326,10 +319,6 @@ null, 0, 0); } - public boolean isExactColumnQuery() { - return exactColumnQuery; - } - /** * {@link #match} return codes. These instruct the scanner moving through * memstores and StoreFiles what to do with the current KeyValue. Index: src/main/java/org/apache/hadoop/hbase/regionserver/AbstractKeyValueScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/AbstractKeyValueScanner.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/AbstractKeyValueScanner.java (working copy) @@ -1,33 +0,0 @@ -/* - * Copyright 2011 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.regionserver; - -import java.io.IOException; - -import org.apache.hadoop.hbase.KeyValue; - -public abstract class AbstractKeyValueScanner implements KeyValueScanner { - - @Override - public boolean seekExactly(KeyValue kv, boolean forward) throws IOException { - return forward ? reseek(kv) : seek(kv); - } - -} Index: src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java (working copy) @@ -40,34 +40,23 @@ * also implements InternalScanner. WARNING: As is, if you try to use this * as an InternalScanner at the Store level, you will get runtime exceptions. */ -public class KeyValueHeap implements KeyValueScanner, InternalScanner { +public class KeyValueHeap extends NonLazyKeyValueScanner + implements KeyValueScanner, InternalScanner { private PriorityQueue heap = null; - private KeyValueScanner current = null; - private KVScannerComparator comparator; /** - * A helper enum that knows how to call the correct seek function within a - * {@link KeyValueScanner}. + * The current sub-scanner, i.e. the one that contains the next key/value + * to return to the client. This scanner is NOT included in {@link #heap} + * (but we frequently add it back to the heap and pull the new winner out). + * We maintain an invariant that the current sub-scanner has already done + * a real seek, and that current.peek() is always a real key/value (or null) + * except for the fake last-key-on-row-column supplied by the multi-column + * Bloom filter optimization, which is OK to propagate to StoreScanner. In + * order to ensure that, always use {@link #pollRealKV()} to update current. */ - public enum SeekType { - NORMAL { - @Override - public boolean seek(KeyValueScanner scanner, KeyValue kv, - boolean forward) throws IOException { - return forward ? scanner.reseek(kv) : scanner.seek(kv); - } - }, - EXACT { - @Override - public boolean seek(KeyValueScanner scanner, KeyValue kv, - boolean forward) throws IOException { - return scanner.seekExactly(kv, forward); - } - }; + private KeyValueScanner current = null; - public abstract boolean seek(KeyValueScanner scanner, KeyValue kv, - boolean forward) throws IOException; - } + private KVScannerComparator comparator; /** * Constructor. This KeyValueHeap will handle closing of passed in @@ -76,7 +65,7 @@ * @param comparator */ public KeyValueHeap(List scanners, - KVComparator comparator) { + KVComparator comparator) throws IOException { this.comparator = new KVScannerComparator(comparator); if (!scanners.isEmpty()) { this.heap = new PriorityQueue(scanners.size(), @@ -88,7 +77,7 @@ scanner.close(); } } - this.current = heap.poll(); + this.current = pollRealKV(); } } @@ -107,13 +96,13 @@ KeyValue kvNext = this.current.peek(); if (kvNext == null) { this.current.close(); - this.current = this.heap.poll(); + this.current = pollRealKV(); } else { KeyValueScanner topScanner = this.heap.peek(); if (topScanner == null || this.comparator.compare(kvNext, topScanner.peek()) >= 0) { this.heap.add(this.current); - this.current = this.heap.poll(); + this.current = pollRealKV(); } } return kvReturn; @@ -149,7 +138,7 @@ } else { this.heap.add(this.current); } - this.current = this.heap.poll(); + this.current = pollRealKV(); return (this.current != null); } @@ -230,13 +219,20 @@ *

* As individual scanners may run past their ends, those scanners are * automatically closed and removed from the heap. + *

+ * This function (and {@link #reseek(KeyValue)}) does not do multi-column + * Bloom filter and lazy-seek optimizations. To enable those, call + * {@link #requestSeek(KeyValue, boolean, boolean)}. * @param seekKey KeyValue to seek at or after * @return true if KeyValues exist at or after specified key, false if not * @throws IOException */ @Override public boolean seek(KeyValue seekKey) throws IOException { - return generalizedSeek(seekKey, SeekType.NORMAL, false); + return generalizedSeek(false, // This is not a lazy seek + seekKey, + false, // forward (false: this is not a reseek) + false); // Not using Bloom filters } /** @@ -245,20 +241,36 @@ */ @Override public boolean reseek(KeyValue seekKey) throws IOException { - return generalizedSeek(seekKey, SeekType.NORMAL, true); + return generalizedSeek(false, // This is not a lazy seek + seekKey, + true, // forward (true because this is reseek) + false); // Not using Bloom filters } /** * {@inheritDoc} */ @Override - public boolean seekExactly(KeyValue seekKey, boolean forward) - throws IOException { - return generalizedSeek(seekKey, SeekType.EXACT, forward); + public boolean requestSeek(KeyValue key, boolean forward, + boolean useBloom) throws IOException { + return generalizedSeek(true, key, forward, useBloom); } - private boolean generalizedSeek(KeyValue seekKey, SeekType seekType, - boolean forward) throws IOException { + /** + * @param isLazy whether we are trying to seek to exactly the given row/col. + * Enables Bloom filter and most-recent-file-first optimizations for + * multi-column get/scan queries. + * @param seekKey key to seek to + * @param forward whether to seek forward (also known as reseek) + * @param useBloom whether to optimize seeks using Bloom filters + */ + private boolean generalizedSeek(boolean isLazy, KeyValue seekKey, + boolean forward, boolean useBloom) throws IOException { + if (!isLazy && useBloom) { + throw new IllegalArgumentException("Multi-column Bloom filter " + + "optimization requires a lazy seek"); + } + if (current == null) { return false; } @@ -269,12 +281,26 @@ while ((scanner = heap.poll()) != null) { KeyValue topKey = scanner.peek(); if (comparator.getComparator().compare(seekKey, topKey) <= 0) { - // Top KeyValue is at-or-after Seek KeyValue - current = scanner; - return true; + // Top KeyValue is at-or-after Seek KeyValue. We only know that all + // scanners are at or after seekKey (because fake keys of + // scanners where a lazy-seek operation has been done are not greater + // than their real next keys) but we still need to enforce our + // invariant that the top scanner has done a real seek. This way + // StoreScanner and RegionScanner do not have to worry about fake keys. + heap.add(scanner); + current = pollRealKV(); + return current != null; } - - if (!seekType.seek(scanner, seekKey, forward)) { + + boolean seekResult; + if (isLazy) { + seekResult = scanner.requestSeek(seekKey, forward, useBloom); + } else { + seekResult = NonLazyKeyValueScanner.doRealSeek( + scanner, seekKey, forward); + } + + if (!seekResult) { scanner.close(); } else { heap.add(scanner); @@ -286,6 +312,62 @@ } /** + * Fetches the top sub-scanner from the priority queue, ensuring that a real + * seek has been done on it. Works by fetching the top sub-scanner, and if it + * has not done a real seek, making it do so (which will modify its top KV), + * putting it back, and repeating this until success. Relies on the fact that + * on a lazy seek we set the current key of a StoreFileScanner to a KV that + * is not greater than the real next KV to be read from that file, so the + * scanner that bubbles up to the top of the heap will have global next KV in + * this scanner heap if (1) it has done a real seek and (2) its KV is the top + * among all top KVs (some of which are fake) in the scanner heap. + */ + private KeyValueScanner pollRealKV() throws IOException { + KeyValueScanner kvScanner = heap.poll(); + if (kvScanner == null) { + return null; + } + + while (kvScanner != null && !kvScanner.realSeekDone()) { + if (kvScanner.peek() != null) { + kvScanner.enforceSeek(); + KeyValue curKV = kvScanner.peek(); + if (curKV != null) { + KeyValueScanner nextEarliestScanner = heap.peek(); + if (nextEarliestScanner == null) { + // The heap is empty. Return the only possible scanner. + return kvScanner; + } + + // Compare the current scanner to the next scanner. We try to avoid + // putting the current one back into the heap if possible. + KeyValue nextKV = nextEarliestScanner.peek(); + if (nextKV == null || comparator.compare(curKV, nextKV) <= 0) { + // We already have the scanner with the earliest KV, so return it. + return kvScanner; + } + + // Otherwise, put the scanner back into the heap and let it compete + // against all other scanners (both those that have done a "real + // seek" and a "lazy seek"). + heap.add(kvScanner); + } else { + // Close the scanner because we did a real seek and found out there + // are no more KVs. + kvScanner.close(); + } + } else { + // Close the scanner because it has already run out of KVs even before + // we had to do a real seek on it. + kvScanner.close(); + } + kvScanner = heap.poll(); + } + + return kvScanner; + } + + /** * @return the current Heap */ public PriorityQueue getHeap() { Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java (working copy) @@ -44,6 +44,11 @@ private final StoreFile.Reader reader; private final HFileScanner hfs; private KeyValue cur = null; + + private boolean realSeekDone; + private boolean delayedReseek; + private KeyValue delayedSeekKV; + private boolean enforceMVCC = false; /** @@ -108,12 +113,16 @@ public boolean seek(KeyValue key) throws IOException { try { - if(!seekAtOrAfter(hfs, key)) { - close(); - return false; + try { + if(!seekAtOrAfter(hfs, key)) { + close(); + return false; + } + cur = hfs.getKeyValue(); + return skipKVsNewerThanReadpoint(); + } finally { + realSeekDone = true; } - cur = hfs.getKeyValue(); - return skipKVsNewerThanReadpoint(); } catch(IOException ioe) { throw new IOException("Could not seek " + this, ioe); } @@ -121,12 +130,16 @@ public boolean reseek(KeyValue key) throws IOException { try { - if (!reseekAtOrAfter(hfs, key)) { - close(); - return false; + try { + if (!reseekAtOrAfter(hfs, key)) { + close(); + return false; + } + cur = hfs.getKeyValue(); + return skipKVsNewerThanReadpoint(); + } finally { + realSeekDone = true; } - cur = hfs.getKeyValue(); - return skipKVsNewerThanReadpoint(); } catch (IOException ioe) { throw new IOException("Could not seek " + this, ioe); } @@ -211,31 +224,93 @@ return reader.getSequenceID(); } + /** + * Pretend we have done a seek but don't do it yet, if possible. The hope is + * that we find requested columns in more recent files and won't have to seek + * in older files. Creates a fake key/value with the given row/column and the + * highest (most recent) possible timestamp we might get from this file. When + * users of such "lazy scanner" need to know the next KV precisely (e.g. when + * this scanner is at the top of the heap), they run {@link #enforceSeek()}. + *

+ * Note that this function does guarantee that the current KV of this scanner + * will be advanced to at least the given KV. Because of this, it does have + * to do a real seek in cases when the seek timestamp is older than the + * highest timestamp of the file, e.g. when we are trying to seek to the next + * row/column and use OLDEST_TIMESTAMP in the seek key. + */ @Override - public boolean seekExactly(KeyValue kv, boolean forward) + public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) throws IOException { if (reader.getBloomFilterType() != StoreFile.BloomType.ROWCOL || - kv.getRowLength() == 0 || kv.getQualifierLength() == 0) { - return forward ? reseek(kv) : seek(kv); + kv.getFamilyLength() == 0) { + useBloom = false; } - boolean isInBloom = reader.passesBloomFilter(kv.getBuffer(), - kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(), - kv.getQualifierOffset(), kv.getQualifierLength()); - if (isInBloom) { - // This row/column might be in this store file. Do a normal seek. - return forward ? reseek(kv) : seek(kv); + boolean haveToSeek = true; + if (useBloom) { + haveToSeek = reader.passesBloomFilter(kv.getBuffer(), + kv.getRowOffset(), kv.getRowLength(), kv.getBuffer(), + kv.getQualifierOffset(), kv.getQualifierLength()); } + delayedReseek = forward; + delayedSeekKV = kv; + + if (haveToSeek) { + // This row/column might be in this store file (or we did not use the + // Bloom filter), so we still need to seek. + realSeekDone = false; + long maxTimestampInFile = reader.getMaxTimestamp(); + long seekTimestamp = kv.getTimestamp(); + if (seekTimestamp > maxTimestampInFile) { + // Create a fake key that is not greater than the real next key. + // (Lower timestamps correspond to higher KVs.) + // To understand this better, consider that we are asked to seek to + // a higher timestamp than the max timestamp in this file. We know that + // the next point when we have to consider this file again is when we + // pass the max timestamp of this file (with the same row/column). + cur = kv.createFirstOnRowColTS(maxTimestampInFile); + } else { + // This will be the case e.g. when we need to seek to the next + // row/column, and we don't know exactly what they are, so we set the + // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this + // row/column. + enforceSeek(); + } + return cur != null; + } + + // Multi-column Bloom filter optimization. // Create a fake key/value, so that this scanner only bubbles up to the top // of the KeyValueHeap in StoreScanner after we scanned this row/column in // all other store files. The query matcher will then just skip this fake - // key/value and the store scanner will progress to the next column. + // key/value and the store scanner will progress to the next column. This + // is obviously not a "real real" seek, but unlike the fake KV earlier in + // this method, we want this to be propagated to ScanQueryMatcher. cur = kv.createLastOnRowCol(); + + realSeekDone = true; return true; } Reader getReaderForTesting() { return reader; } + + @Override + public boolean realSeekDone() { + return realSeekDone; + } + + @Override + public void enforceSeek() throws IOException { + if (realSeekDone) + return; + + if (delayedReseek) { + reseek(delayedSeekKV); + } else { + seek(delayedSeekKV); + } + } } Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (working copy) @@ -37,7 +37,8 @@ * Scanner scans both the memstore and the HStore. Coalesce KeyValue stream * into List for a single row. */ -class StoreScanner implements KeyValueScanner, InternalScanner, ChangedReadersObserver { +class StoreScanner extends NonLazyKeyValueScanner + implements KeyValueScanner, InternalScanner, ChangedReadersObserver { static final Log LOG = LogFactory.getLog(StoreScanner.class); private Store store; private ScanQueryMatcher matcher; @@ -48,10 +49,35 @@ // Doesnt need to be volatile because it's always accessed via synchronized methods private boolean closing = false; private final boolean isGet; + private final boolean explicitColumnQuery; + private final boolean useRowColBloom; + /** We don't ever expect to change this, the constant is just for clarity. */ + static final boolean LAZY_SEEK_ENABLED_BY_DEFAULT = true; + + /** Used during unit testing to ensure that lazy seek does save seek ops */ + private static boolean lazySeekEnabledGlobally = + LAZY_SEEK_ENABLED_BY_DEFAULT; + // if heap == null and lastTop != null, you need to reseek given the key below private KeyValue lastTop = null; + /** An internal constructor. */ + private StoreScanner(Store store, boolean cacheBlocks, Scan scan, + final NavigableSet columns){ + this.store = store; + this.cacheBlocks = cacheBlocks; + isGet = scan.isGetScan(); + int numCol = columns == null ? 0 : columns.size(); + explicitColumnQuery = numCol > 0; + + // We look up row-column Bloom filters for multi-column queries as part of + // the seek operation. However, we also look the row-column Bloom filter + // for multi-row (non-"get") scans because this is not done in + // StoreFile.passesBloomFilter(Scan, SortedSet). + useRowColBloom = numCol > 1 || (!isGet && numCol == 1); + } + /** * Opens a scanner across memstore, snapshot, and all StoreFiles. * @@ -62,22 +88,21 @@ */ StoreScanner(Store store, Scan scan, final NavigableSet columns) throws IOException { - this.store = store; - this.cacheBlocks = scan.getCacheBlocks(); + this(store, scan.getCacheBlocks(), scan, columns); matcher = new ScanQueryMatcher(scan, store.getFamily().getName(), columns, store.ttl, store.comparator.getRawComparator(), store.minVersions, store.versionsToReturn(scan.getMaxVersions()), false, Long.MAX_VALUE); - this.isGet = scan.isGetScan(); - // pass columns = try to filter out unnecessary ScanFiles + // Pass columns to try to filter out unnecessary StoreFiles. List scanners = getScanners(scan, columns); // Seek all scanners to the start of the Row (or if the exact matching row // key does not exist, then to the start of the next matching Row). - if (matcher.isExactColumnQuery()) { - for (KeyValueScanner scanner : scanners) - scanner.seekExactly(matcher.getStartKey(), false); + if (explicitColumnQuery && lazySeekEnabledGlobally) { + for (KeyValueScanner scanner : scanners) { + scanner.requestSeek(matcher.getStartKey(), false, useRowColBloom); + } } else { for (KeyValueScanner scanner : scanners) scanner.seek(matcher.getStartKey()); @@ -102,9 +127,7 @@ StoreScanner(Store store, Scan scan, List scanners, boolean retainDeletesInOutput, long smallestReadPoint) throws IOException { - this.store = store; - this.cacheBlocks = false; - this.isGet = false; + this(store, false, scan, null); matcher = new ScanQueryMatcher(scan, store.getFamily().getName(), null, store.ttl, store.comparator.getRawComparator(), store.minVersions, store.versionsToReturn(scan.getMaxVersions()), retainDeletesInOutput, smallestReadPoint); @@ -124,9 +147,7 @@ final NavigableSet columns, final List scanners) throws IOException { - this.store = null; - this.isGet = false; - this.cacheBlocks = scan.getCacheBlocks(); + this(null, scan.getCacheBlocks(), scan, columns); this.matcher = new ScanQueryMatcher(scan, colFamily, columns, ttl, comparator.getRawComparator(), 0, scan.getMaxVersions(), false, Long.MAX_VALUE); @@ -226,7 +247,6 @@ * @return true if there are more rows, false if scanner is done */ public synchronized boolean next(List outResult, int limit) throws IOException { - //DebugPrint.println("SS.next"); if (checkReseek()) { return true; @@ -308,8 +328,8 @@ return false; case SEEK_NEXT_ROW: - // This is just a relatively simple end of scan fix, to short-cut end us if there is a - // endKey in the scan. + // This is just a relatively simple end of scan fix, to short-cut end + // us if there is an endKey in the scan. if (!matcher.moreRowsMayExistAfter(kv)) { outResult.addAll(results); return false; @@ -433,8 +453,11 @@ public synchronized boolean reseek(KeyValue kv) throws IOException { //Heap cannot be null, because this is only called from next() which //guarantees that heap will never be null before this call. - return matcher.isExactColumnQuery() ? heap.seekExactly(kv, true) : - heap.reseek(kv); + if (explicitColumnQuery && lazySeekEnabledGlobally) { + return heap.requestSeek(kv, true, useRowColBloom); + } else { + return heap.reseek(kv); + } } @Override @@ -442,11 +465,6 @@ return 0; } - @Override - public boolean seekExactly(KeyValue kv, boolean forward) throws IOException { - throw new NotImplementedException(); - } - /** * Used in testing. * @return all scanners in no particular order @@ -460,4 +478,9 @@ allScanners.add(scanner); return allScanners; } + + static void enableLazySeekGlobally(boolean enable) { + lazySeekEnabledGlobally = enable; + } } + Index: src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java (revision 0) +++ src/main/java/org/apache/hadoop/hbase/regionserver/NonLazyKeyValueScanner.java (revision 0) @@ -0,0 +1,55 @@ +/* + * Copyright 2011 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; + +import org.apache.commons.lang.NotImplementedException; +import org.apache.hadoop.hbase.KeyValue; + +/** + * A "non-lazy" scanner which always does a real seek operation. Most scanners + * are inherited from this class. + */ +public abstract class NonLazyKeyValueScanner implements KeyValueScanner { + + @Override + public boolean requestSeek(KeyValue kv, boolean forward, boolean useBloom) + throws IOException { + return doRealSeek(this, kv, forward); + } + + @Override + public boolean realSeekDone() { + return true; + } + + @Override + public void enforceSeek() throws IOException { + throw new NotImplementedException("enforceSeek must not be called on a " + + "non-lazy scanner"); + } + + public static boolean doRealSeek(KeyValueScanner scanner, + KeyValue kv, boolean forward) throws IOException { + return forward ? scanner.reseek(kv) : scanner.seek(kv); + } + +} Index: src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java (working copy) @@ -632,7 +632,7 @@ * map and snapshot. * This behaves as if it were a real scanner but does not maintain position. */ - protected class MemStoreScanner extends AbstractKeyValueScanner { + protected class MemStoreScanner extends NonLazyKeyValueScanner { // Next row information for either kvset or snapshot private KeyValue kvsetNextRow = null; private KeyValue snapshotNextRow = null; Index: src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/regionserver/StoreFile.java (working copy) @@ -1374,6 +1374,10 @@ void disableBloomFilterForTesting() { bloomFilter = null; } + + public long getMaxTimestamp() { + return timeRangeTracker.maximumTimestamp; + } } /** Index: src/main/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/KeyValue.java (revision 1230825) +++ src/main/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -1740,6 +1740,21 @@ } /** + * Creates the first KV with the row/family/qualifier of this KV and the + * given timestamp. Uses the "maximum" KV type that guarantees that the new + * KV is the lowest possible for this combination of row, family, qualifier, + * and timestamp. This KV's own timestamp is ignored. While this function + * copies the value from this KV, it is normally used on key-only KVs. + */ + public KeyValue createFirstOnRowColTS(long ts) { + return new KeyValue( + bytes, getRowOffset(), getRowLength(), + bytes, getFamilyOffset(), getFamilyLength(), + bytes, getQualifierOffset(), getQualifierLength(), + ts, Type.Maximum, bytes, getValueOffset(), getValueLength()); + } + + /** * @param b * @return A KeyValue made of a byte array that holds the key-only part. * Needed to convert hfile index members to KeyValues.