Index: src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java (revision 803512) +++ src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java (working copy) @@ -42,7 +42,7 @@ private final Log LOG = LogFactory.getLog(this.getClass()); private MemStore memstore; private static final int ROW_COUNT = 10; - private static final int QUALIFIER_COUNT = 10; + private static final int QUALIFIER_COUNT = ROW_COUNT; private static final byte [] FAMILY = Bytes.toBytes("column"); private static final byte [] CONTENTS_BASIC = Bytes.toBytes("contents:basic"); private static final String CONTENTSTR = "contentstr"; @@ -53,6 +53,63 @@ this.memstore = new MemStore(); } + /** For HBASE-528 */ + public void testGetRowKeyAtOrBefore() { + // set up some test data + byte [] t10 = Bytes.toBytes("010"); + byte [] t20 = Bytes.toBytes("020"); + byte [] t30 = Bytes.toBytes("030"); + byte [] t35 = Bytes.toBytes("035"); + byte [] t40 = Bytes.toBytes("040"); + + memstore.add(getKV(t10, "t10 bytes".getBytes())); + memstore.add(getKV(t20, "t20 bytes".getBytes())); + memstore.add(getKV(t30, "t30 bytes".getBytes())); + memstore.add(getKV(t35, "t35 bytes".getBytes())); + // write a delete in there to see if things still work ok + memstore.add(getDeleteKV(t35)); + memstore.add(getKV(t40, "t40 bytes".getBytes())); + + // try finding "015" + KeyValue t15 = new KeyValue(Bytes.toBytes("015"), + System.currentTimeMillis()); + GetClosestRowBeforeTracker tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t15, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + KeyValue kv = tracker.getCandidate(); + assertTrue(KeyValue.COMPARATOR.compareRows(kv, t10) == 0); + + // try "020", we should get that row exactly + results = + new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); + memstore.getRowKeyAtOrBefore(new KeyValue(t20, System.currentTimeMillis()), + results); + assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t20) == 0); + + // try "030", we should get that row exactly + results = + new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); + memstore.getRowKeyAtOrBefore(new KeyValue(t30, System.currentTimeMillis()), + results); + assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); + + // try "038", should skip the deleted "035" and give "030" + results = + new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); + byte [] t38 = Bytes.toBytes("038"); + memstore.getRowKeyAtOrBefore(new KeyValue(t38, System.currentTimeMillis()), + results); + assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); + + // try "050", should get stuff from "040" + results = + new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); + byte [] t50 = Bytes.toBytes("050"); + memstore.getRowKeyAtOrBefore(new KeyValue(t50, System.currentTimeMillis()), + results); + assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t40) == 0); + } + public void testPutSameKey() { byte [] bytes = Bytes.toBytes(getName()); KeyValue kv = new KeyValue(bytes, bytes, bytes, bytes); @@ -82,6 +139,8 @@ while (s.next(result)) { LOG.info(result); count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); result.clear(); } } finally { @@ -98,6 +157,8 @@ // Assert the stuff is coming out in right order. assertTrue(Bytes.compareTo(Bytes.toBytes(count), result.get(0).getRow()) == 0); count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); if (count == 2) { this.memstore.snapshot(); LOG.info("Snapshotted"); @@ -108,6 +169,34 @@ s.close(); } assertEquals(rowCount, count); + // Assert that new values are seen in kvset as we scan. + long ts = System.currentTimeMillis(); + s = new StoreScanner(scan, null, HConstants.LATEST_TIMESTAMP, + this.memstore.comparator, null, memstorescanners); + count = 0; + int snapshotIndex = 5; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(Bytes.compareTo(Bytes.toBytes(count), result.get(0).getRow()) == 0); + // Row count is same as column count. + assertEquals("count=" + count + ", result=" + result, + rowCount, result.size()); + count++; + if (count == snapshotIndex) { + this.memstore.snapshot(); + this.memstore.clearSnapshot(this.memstore.getSnapshot()); + // Added more rows into kvset. + addRows(this.memstore, ts); + LOG.info("Snapshotted, cleared it and then added values"); + } + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); } /** @@ -126,7 +215,7 @@ } public void testMultipleVersionsSimple() throws Exception { - MemStore m = new MemStore(HConstants.FOREVER, KeyValue.COMPARATOR); + MemStore m = new MemStore(KeyValue.COMPARATOR); byte [] row = Bytes.toBytes("testRow"); byte [] family = Bytes.toBytes("testFamily"); byte [] qf = Bytes.toBytes("testQualifier"); @@ -146,7 +235,7 @@ } public void testBinary() throws IOException { - MemStore mc = new MemStore(HConstants.FOREVER, KeyValue.ROOT_COMPARATOR); + MemStore mc = new MemStore(KeyValue.ROOT_COMPARATOR); final int start = 43; final int end = 46; for (int k = start; k <= end; k++) { @@ -180,66 +269,7 @@ ////////////////////////////////////////////////////////////////////////////// // Get tests ////////////////////////////////////////////////////////////////////////////// - /** For HBASE-528 */ - public void testGetRowKeyAtOrBefore() { - // set up some test data - byte [] t10 = Bytes.toBytes("010"); - byte [] t20 = Bytes.toBytes("020"); - byte [] t30 = Bytes.toBytes("030"); - byte [] t35 = Bytes.toBytes("035"); - byte [] t40 = Bytes.toBytes("040"); - - memstore.add(getKV(t10, "t10 bytes".getBytes())); - memstore.add(getKV(t20, "t20 bytes".getBytes())); - memstore.add(getKV(t30, "t30 bytes".getBytes())); - memstore.add(getKV(t35, "t35 bytes".getBytes())); - // write a delete in there to see if things still work ok - memstore.add(getDeleteKV(t35)); - memstore.add(getKV(t40, "t40 bytes".getBytes())); - - NavigableSet results = null; - - // try finding "015" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - KeyValue t15 = new KeyValue(Bytes.toBytes("015"), - System.currentTimeMillis()); - memstore.getRowKeyAtOrBefore(t15, results); - KeyValue kv = results.last(); - assertTrue(KeyValue.COMPARATOR.compareRows(kv, t10) == 0); - // try "020", we should get that row exactly - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - memstore.getRowKeyAtOrBefore(new KeyValue(t20, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t20) == 0); - - // try "030", we should get that row exactly - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - memstore.getRowKeyAtOrBefore(new KeyValue(t30, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); - - // try "038", should skip the deleted "035" and give "030" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - byte [] t38 = Bytes.toBytes("038"); - memstore.getRowKeyAtOrBefore(new KeyValue(t38, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); - - // try "050", should get stuff from "040" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - byte [] t50 = Bytes.toBytes("050"); - memstore.getRowKeyAtOrBefore(new KeyValue(t50, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t40) == 0); - } - - /** Test getNextRow from memstore * @throws InterruptedException */ @@ -606,8 +636,19 @@ * @throws IOException */ private int addRows(final MemStore hmc) { + return addRows(hmc, HConstants.LATEST_TIMESTAMP); + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #COLUMNS_COUNT} + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final MemStore hmc, final long ts) { for (int i = 0; i < ROW_COUNT; i++) { - long timestamp = System.currentTimeMillis(); + long timestamp = ts == HConstants.LATEST_TIMESTAMP? + System.currentTimeMillis(): ts; for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { byte [] row = Bytes.toBytes(i); byte [] qf = makeQualifier(i, ii); Index: src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java (revision 0) +++ src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java (revision 0) @@ -0,0 +1,166 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.NavigableMap; +import java.util.TreeMap; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; + +/** + * State processing {@link HRegion#getClosestRowBefore(byte[], byte[])}. + * Like {@link GetDeleteTracker} and {@link ScanDeleteTracker} but does not + * implement the {@link DeleteTracker} interface since the below can span row. + */ +class GetClosestRowBeforeTracker { + private final long ts = System.currentTimeMillis(); + private final KeyValue targetkey; + private final long ttl; + private KeyValue candidate = null; + private final KeyValue.RowComparator rc; + + // Deletes keyed row. Uses a comparator that compares on row portion of + // the KeyValue only. + private final NavigableMap deletes; + + /** + * @param c + * @param kv Presume first on row: i.e. empty column and empty ts. + * @param ttl Time to live in ms. + */ + GetClosestRowBeforeTracker(final KVComparator c, final KeyValue kv, + final long ttl) { + super(); + this.targetkey = kv; + this.ttl = ttl; + this.rc = new KeyValue.RowComparator(c); + this.deletes = new TreeMap(this.rc); + } + + /** + * @param kv + * @return True if this kv is expired. + */ + boolean isExpired(final KeyValue kv) { + return kv.getTimestamp() < (this.ts - this.ttl); + } + + /* + * Add the specified KeyValue to the list of deletes. + * @param kv + */ + private void addDelete(final KeyValue kv) { + GetDeleteTracker tracker = this.deletes.get(kv); + if (tracker == null) { + tracker = new GetDeleteTracker(); + this.deletes.put(kv, tracker); + } + tracker.add(kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength(), + kv.getTimestamp(), kv.getType()); + } + + /* + * @param kv Adds candidate if nearer the target than previous candidate. + * @return True if updated candidate. + */ + private boolean addCandidate(final KeyValue kv) { + if (isBetterCandidate(kv)) { + this.candidate = kv; + return true; + } + return false; + } + + boolean isBetterCandidate(final KeyValue contender) { + return this.candidate == null || + (this.rc.compare(this.candidate, contender) < 0 && + this.rc.compare(contender, this.targetkey) <= 0); + } + + /* + * Check if the specified KeyValue buffer has been deleted by a previously + * seen delete. + * @param buffer KeyValue buffer + * @param qualifierOffset column qualifier offset + * @param qualifierLength column qualifier length + * @param timestamp timestamp + * @return true is the specified KeyValue is deleted, false if not + */ + private boolean isDeleted(final KeyValue kv) { + if (this.deletes.isEmpty()) return false; + GetDeleteTracker tracker = this.deletes.get(kv); + if (tracker == null || tracker.isEmpty()) return false; + return tracker.isDeleted(kv.getBuffer(), kv.getQualifierOffset(), + kv.getQualifierLength(), kv.getTimestamp()); + } + + /* + * Handle keys whose values hold deletes. + * Add to the set of deletes and then if the candidate keys contain any that + * might match, then check for a match and remove it. Implies candidates + * is made with a Comparator that ignores key type. + * @param kv + * @return True if we removed k from candidates. + */ + boolean handleDeletes(final KeyValue kv) { + addDelete(kv); + boolean deleted = false; + if (!hasCandidate()) return deleted; + // We could encounter deletes when walking the memstore in reverse so check + // the just added delete doesn't overwrite the candidate? + if (isDeleted(this.candidate)) { + this.candidate = null; + deleted = true; + } + return deleted; + } + + /** + * Do right thing with passed key, add to deletes or add to candidates. + * @param kv + * @return True if we added a candidate + */ + boolean handle(final KeyValue kv) { + if (kv.isDelete()) { + handleDeletes(kv); + return false; + } + return addCandidate(kv); + } + + /** + * @return True if has candidate + */ + public boolean hasCandidate() { + return this.candidate != null; + } + + /** + * @return Best candidate or null. + */ + public KeyValue getCandidate() { + return this.candidate; + } + + public KeyValue getTargetKey() { + return this.targetkey; + } +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (working copy) @@ -145,7 +145,6 @@ * @return true if there are more rows, false if scanner is done */ public synchronized boolean next(List outResult) throws IOException { - List results = new ArrayList(); KeyValue peeked = this.heap.peek(); if (peeked == null) { close(); @@ -153,6 +152,7 @@ } matcher.setRow(peeked.getRow()); KeyValue kv; + List results = new ArrayList(); while((kv = this.heap.peek()) != null) { QueryMatcher.MatchCode qcode = matcher.match(kv); switch(qcode) { @@ -162,7 +162,6 @@ continue; case DONE: - // copy jazz outResult.addAll(results); return true; @@ -198,7 +197,6 @@ if (!results.isEmpty()) { // copy jazz outResult.addAll(results); - return true; } Index: src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java (working copy) @@ -38,8 +38,8 @@ * This class is NOT thread-safe as queries are never multi-threaded */ public class GetDeleteTracker implements DeleteTracker { - - private long familyStamp = -1L; + private static long UNSET = -1L; + private long familyStamp = UNSET; protected List deletes = null; private List newDeletes = new ArrayList(); private Iterator iterator; @@ -64,7 +64,7 @@ @Override public void add(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp, byte type) { - if(type == KeyValue.Type.DeleteFamily.getCode()) { + if (type == KeyValue.Type.DeleteFamily.getCode()) { if(timestamp > familyStamp) { familyStamp = timestamp; } @@ -88,7 +88,6 @@ @Override public boolean isDeleted(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp) { - // Check against DeleteFamily if (timestamp <= familyStamp) { return true; @@ -103,7 +102,7 @@ int ret = Bytes.compareTo(buffer, qualifierOffset, qualifierLength, this.delete.buffer, this.delete.qualifierOffset, this.delete.qualifierLength); - if(ret <= -1) { + if (ret <= -1) { // Have not reached the next delete yet return false; } else if(ret >= 1) { @@ -149,7 +148,7 @@ @Override public boolean isEmpty() { - if(this.familyStamp == 0L && this.delete == null) { + if(this.familyStamp == UNSET && this.delete == null) { return true; } return false; @@ -160,7 +159,7 @@ this.deletes = null; this.delete = null; this.newDeletes = new ArrayList(); - this.familyStamp = 0L; + this.familyStamp = UNSET; this.iterator = null; } Index: src/java/org/apache/hadoop/hbase/regionserver/MemStore.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/MemStore.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/regionserver/MemStore.java (working copy) @@ -29,13 +29,11 @@ import java.util.List; import java.util.NavigableSet; import java.util.SortedSet; -import java.util.TreeSet; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.regionserver.DeleteCompare.DeleteCode; @@ -56,8 +54,6 @@ public class MemStore implements HeapSize { private static final Log LOG = LogFactory.getLog(MemStore.class); - private final long ttl; - // MemStore. Use a KeyValueSkipListSet rather than SkipListSet because of the // better semantics. The Map will overwrite if passed a key it already had // whereas the Set will not add new KV if key is same though value might be @@ -85,7 +81,7 @@ * Default constructor. Used for tests. */ public MemStore() { - this(HConstants.FOREVER, KeyValue.COMPARATOR); + this(KeyValue.COMPARATOR); } /** @@ -93,8 +89,7 @@ * @param ttl The TTL for cache entries, in milliseconds. * @param c */ - public MemStore(final long ttl, final KeyValue.KVComparator c) { - this.ttl = ttl; + public MemStore(final KeyValue.KVComparator c) { this.comparator = c; this.comparatorIgnoreTimestamp = this.comparator.getComparatorIgnoringTimestamps(); @@ -325,200 +320,94 @@ return result; } - /** - * @param row Row to look for. - * @param candidateKeys Map of candidate keys (Accumulation over lots of - * lookup over stores and memstores) + * @param state */ - void getRowKeyAtOrBefore(final KeyValue row, - final NavigableSet candidateKeys) { - getRowKeyAtOrBefore(row, candidateKeys, - new TreeSet(this.comparator), System.currentTimeMillis()); - } - - /** - * @param kv Row to look for. - * @param candidates Map of candidate keys (Accumulation over lots of - * lookup over stores and memstores). Pass a Set with a Comparator that - * ignores key Type so we can do Set.remove using a delete, i.e. a KeyValue - * with a different Type to the candidate key. - * @param deletes Pass a Set that has a Comparator that ignores key type. - * @param now - */ - void getRowKeyAtOrBefore(final KeyValue kv, - final NavigableSet candidates, - final NavigableSet deletes, final long now) { + void getRowKeyAtOrBefore(final GetClosestRowBeforeTracker state) { this.lock.readLock().lock(); try { - getRowKeyAtOrBefore(kvset, kv, candidates, deletes, now); - getRowKeyAtOrBefore(snapshot, kv, candidates, deletes, now); + getRowKeyAtOrBefore(kvset, state); + getRowKeyAtOrBefore(snapshot, state); } finally { this.lock.readLock().unlock(); } } + /* + * @param set + * @param state Accumulates deletes and candidates. + */ private void getRowKeyAtOrBefore(final NavigableSet set, - final KeyValue kv, final NavigableSet candidates, - final NavigableSet deletes, final long now) { + final GetClosestRowBeforeTracker state) { if (set.isEmpty()) { return; } - // We want the earliest possible to start searching from. Start before - // the candidate key in case it turns out a delete came in later. - KeyValue search = candidates.isEmpty()? kv: candidates.first(); + // + if (!walkForwardInSingleRow(set, state.getTargetKey(), state)) { + // Found nothing in row. Try backing up. + getRowKeyBefore(set, state); + } + } - // Get all the entries that come equal or after our search key - SortedSet tail = set.tailSet(search); - - // if there are items in the tail map, there's either a direct match to - // the search key, or a range of values between the first candidate key - // and the ultimate search key (or the end of the cache) - if (!tail.isEmpty() && - this.comparator.compareRows(tail.first(), search) <= 0) { - // Keep looking at cells as long as they are no greater than the - // ultimate search key and there's still records left in the map. - KeyValue deleted = null; - KeyValue found = null; - for (Iterator iterator = tail.iterator(); - iterator.hasNext() && (found == null || - this.comparator.compareRows(found, kv) <= 0);) { - found = iterator.next(); - if (this.comparator.compareRows(found, kv) <= 0) { - if (found.isDeleteType()) { - Store.handleDeletes(found, candidates, deletes); - if (deleted == null) { - deleted = found; - } - } else { - if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) { - candidates.add(found); - } else { - if (deleted == null) { - deleted = found; - } - // TODO: Check this removes the right key. - // Its expired. Remove it. - iterator.remove(); - } - } - } + private boolean walkForwardInSingleRow(final NavigableSet set, + final KeyValue firstOnRow, final GetClosestRowBeforeTracker state) { + boolean foundCandidate = false; + // Target key is first key on a row -- no column and latest timestamp. + SortedSet tail = set.tailSet(firstOnRow); + if (tail.isEmpty()) return foundCandidate; + for (Iterator i = tail.iterator(); i.hasNext();) { + KeyValue kv = i.next(); + // Did we go beyond the target row? If so break. + if (isTooFar(kv, firstOnRow)) break; + if (state.isExpired(kv)) { + i.remove(); + continue; } - if (candidates.isEmpty() && deleted != null) { - getRowKeyBefore(set, deleted, candidates, deletes, now); + // If we added something, this row is a contender. break. + if (state.handle(kv)) { + foundCandidate = true; + break; } - } else { - // The tail didn't contain any keys that matched our criteria, or was - // empty. Examine all the keys that proceed our splitting point. - getRowKeyBefore(set, search, candidates, deletes, now); } + return foundCandidate; } /* - * Get row key that comes before passed search_key - * Use when we know search_key is not in the map and we need to search - * earlier in the cache. + * @param kv Current kv + * @param First on row kv. + * @param state + * @return True if we went too far, past the target key. + */ + private boolean isTooFar(final KeyValue kv, final KeyValue firstOnRow) { + return this.comparator.compareRows(kv, firstOnRow) > 0; + } + + /* + * Walk backwards through the passed set. * @param set - * @param search - * @param candidates - * @param deletes Pass a Set that has a Comparator that ignores key type. - * @param now + * @param state */ private void getRowKeyBefore(NavigableSet set, - KeyValue search, NavigableSet candidates, - final NavigableSet deletes, final long now) { - NavigableSet head = set.headSet(search, false); + final GetClosestRowBeforeTracker state) { + // Search backward from target row. + NavigableSet head = set.headSet(state.getTargetKey(), false); // If we tried to create a headMap and got an empty map, then there are // no keys at or before the search key, so we're done. if (head.isEmpty()) { return; } - - // If there aren't any candidate keys at this point, we need to search - // backwards until we find at least one candidate or run out of headMap. - if (candidates.isEmpty()) { - KeyValue lastFound = null; - // TODO: Confirm we're iterating in the right order - for (Iterator i = head.descendingIterator(); - i.hasNext();) { - KeyValue found = i.next(); - // if the last row we found a candidate key for is different than - // the row of the current candidate, we can stop looking -- if its - // not a delete record. - boolean deleted = found.isDeleteType(); - if (lastFound != null && - this.comparator.matchingRows(lastFound, found) && !deleted) { - break; - } - // If this isn't a delete, record it as a candidate key. Also - // take note of this candidate so that we'll know when - // we cross the row boundary into the previous row. - if (!deleted) { - if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) { - lastFound = found; - candidates.add(found); - } else { - // Its expired. - Store.expiredOrDeleted(set, found); - } - } else { - // We are encountering items in reverse. We may have just added - // an item to candidates that this later item deletes. Check. If we - // found something in candidates, remove it from the set. - if (Store.handleDeletes(found, candidates, deletes)) { - remove(set, found); - } - } - } - } else { - // If there are already some candidate keys, we only need to consider - // the very last row's worth of keys in the headMap, because any - // smaller acceptable candidate keys would have caused us to start - // our search earlier in the list, and we wouldn't be searching here. - SortedSet rowTail = - head.tailSet(head.last().cloneRow(HConstants.LATEST_TIMESTAMP)); - Iterator i = rowTail.iterator(); - do { - KeyValue found = i.next(); - if (found.isDeleteType()) { - Store.handleDeletes(found, candidates, deletes); - } else { - if (ttl == HConstants.FOREVER || - now < found.getTimestamp() + ttl || - !deletes.contains(found)) { - candidates.add(found); - } else { - Store.expiredOrDeleted(set, found); - } - } - } while (i.hasNext()); - } - } - - - /* - * @param set - * @param delete This is a delete record. Remove anything behind this of same - * r/c/ts. - * @return True if we removed anything. - */ - private boolean remove(final NavigableSet set, - final KeyValue delete) { - SortedSet s = set.tailSet(delete); - if (s.isEmpty()) { - return false; - } - boolean removed = false; - for (KeyValue kv: s) { - if (this.comparatorIgnoreType.compare(kv, delete) == 0) { - // Same r/c/ts. Remove it. - s.remove(kv); - removed = true; + for (Iterator i = head.descendingIterator(); i.hasNext();) { + KeyValue found = i.next(); + if (state.isExpired(found)) { + i.remove(); continue; } - break; + KeyValue firstOnRow = found.cloneRow(); + // Stop looking if we're not in the better candidate range. + if (!state.isBetterCandidate(firstOnRow)) break; + // If we find something, break; + if (walkForwardInSingleRow(head, firstOnRow, state)) break; } - return removed; } /** @@ -527,9 +416,8 @@ KeyValueScanner [] getScanners() { this.lock.readLock().lock(); try { - KeyValueScanner [] scanners = new KeyValueScanner[2]; - scanners[0] = new MemStoreScanner(this.kvset); - scanners[1] = new MemStoreScanner(this.snapshot); + KeyValueScanner [] scanners = new KeyValueScanner[1]; + scanners[0] = new MemStoreScanner(); return scanners; } finally { this.lock.readLock().unlock(); @@ -603,18 +491,17 @@ /* * MemStoreScanner implements the KeyValueScanner. - * It lets the caller scan the contents of a memstore. - * This behaves as if it were a real scanner but does not maintain position - * in the passed memstore tree. + * It lets the caller scan the contents of a memstore -- both current + * map and snapshot. + * This behaves as if it were a real scanner but does not maintain position. */ protected class MemStoreScanner implements KeyValueScanner { - private final NavigableSet kvs; - private KeyValue current = null; private List result = new ArrayList(); private int idx = 0; + private KeyValue firstOnNextRow = null; - MemStoreScanner(final NavigableSet s) { - this.kvs = s; + MemStoreScanner() { + super(); } public boolean seek(KeyValue key) { @@ -623,7 +510,7 @@ close(); return false; } - this.current = key; + this.firstOnNextRow = key; return cacheNextRow(); } catch(Exception e) { close(); @@ -652,35 +539,66 @@ } /** - * @return True if we successfully cached a NavigableSet aligned on - * next row. + * @return True if successfully cached a next row. */ boolean cacheNextRow() { - SortedSet keys; + this.result.clear(); + this.idx = 0; + // Prevent snapshot being cleared while caching a row. + lock.readLock().lock(); try { - keys = this.kvs.tailSet(this.current); - } catch (Exception e) { - close(); - return false; + // Look at each set, kvset and snapshot. + // Both look for matching entries for this.current row returning what + // they + // have as next row after this.current (or null if nothing in set or if + // nothing follows. + KeyValue kvsetNextRow = cacheNextRow(kvset); + KeyValue snapshotNextRow = cacheNextRow(snapshot); + if (kvsetNextRow == null && snapshotNextRow == null) { + // Nothing more in memstore but we might have gotten current row + // results + // Indicate at end of store by setting next row to null. + this.firstOnNextRow = null; + return !this.result.isEmpty(); + } else if (kvsetNextRow != null && snapshotNextRow != null) { + // Set current at the lowest of the two values. + int compare = comparator.compare(kvsetNextRow, snapshotNextRow); + this.firstOnNextRow = (compare <= 0) ? kvsetNextRow : snapshotNextRow; + } else { + this.firstOnNextRow = kvsetNextRow != null ? kvsetNextRow + : snapshotNextRow; + } + return true; + } finally { + lock.readLock().unlock(); } - if (keys == null || keys.isEmpty()) { - close(); - return false; - } - this.current = null; - byte [] row = keys.first().getRow(); - for (KeyValue kv: keys) { - if (comparator.compareRows(kv, row) != 0) { - this.current = kv; + } + + /* + * See if set has entries for the this.current row. If so, + * add them to this.result. + * @param set Set to examine + * @return Next row in passed set or null if nothing in this + * passed set + */ + private KeyValue cacheNextRow(final NavigableSet set) { + if (this.firstOnNextRow == null || set.isEmpty()) return null; + SortedSet tail = set.tailSet(this.firstOnNextRow); + if (tail == null || tail.isEmpty()) return null; + KeyValue first = tail.first(); + KeyValue nextRow = null; + for (KeyValue kv: tail) { + if (comparator.compareRows(first, kv) != 0) { + nextRow = kv; break; } - result.add(kv); + this.result.add(kv); } - return true; + return nextRow; } public void close() { - current = null; + firstOnNextRow = null; idx = 0; if (!result.isEmpty()) { result.clear(); @@ -764,4 +682,4 @@ } LOG.info("Exiting."); } -} +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -1022,9 +1022,8 @@ * @return map of values * @throws IOException */ - public Result getClosestRowBefore(final byte [] row, - final byte [] family) - throws IOException{ + public Result getClosestRowBefore(final byte [] row, final byte [] family) + throws IOException { // look across all the HStores for this region and determine what the // closest key is across all column families, since the data may be sparse KeyValue key = null; @@ -1046,7 +1045,7 @@ } Get get = new Get(key.getRow()); store.get(get, null, results); - + LOG.info("GETCLOSEST: " + kv.toString() + " " + key.toString()); return new Result(results); } finally { splitsAndClosesLock.readLock().unlock(); @@ -1161,7 +1160,7 @@ for (KeyValue kv: kvs) { // Check if time is LATEST, change to time of most recent addition if so // This is expensive. - if (kv.isLatestTimestamp() && kv.isDeleteType()) { + if (kv.isLatestTimestamp() && kv.isDelete()) { List result = new ArrayList(1); Get g = new Get(kv.getRow()); NavigableSet qualifiers = @@ -2430,6 +2429,7 @@ if (majorCompact) { region.compactStores(true); } else { + /* // Default behavior Scan scan = new Scan(); InternalScanner scanner = region.getScanner(scan); @@ -2443,7 +2443,8 @@ } while (done); } finally { scanner.close(); - } + }*/ + System.out.println(region.getClosestRowBefore(Bytes.toBytes("GeneratedCSVContent2,844EE3A3A260566C6A10D027C92D3796,9993372036854775807"))); } } finally { region.close(); @@ -2481,7 +2482,6 @@ printUsageAndExit("ERROR: Unrecognized option <" + args[1] + ">"); } majorCompact = true; - } Path tableDir = new Path(args[0]); HBaseConfiguration c = new HBaseConfiguration(); Index: src/java/org/apache/hadoop/hbase/regionserver/Store.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/Store.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/regionserver/Store.java (working copy) @@ -181,7 +181,7 @@ // second -> ms adjust for user data this.ttl *= 1000; } - this.memstore = new MemStore(this.ttl, this.comparator); + this.memstore = new MemStore(this.comparator); this.regionCompactionDir = new Path(HRegion.getCompactionDir(basedir), Integer.toString(info.getEncodedName())); this.storeName = this.family.getName(); @@ -1031,47 +1031,32 @@ /** * Find the key that matches row exactly, or the one that immediately * preceeds it. WARNING: Only use this method on a table where writes occur - * with stricly increasing timestamps. This method assumes this pattern of - * writes in order to make it reasonably performant. - * @param targetkey - * @return Found keyvalue + * with strictly increasing timestamps. This method assumes this pattern of + * writes in order to make it reasonably performant. Also dependent on the + * axiom that deletes are for cells that are in the container that follows + * whether a memstore snapshot or a storefile, not for the current container: + * i.e. we'll see deletes before we come across cells we are to delete. + * Presumption is that the memstore#kvset is processed before memstore#snapshot + * and so on. + * @param kv First possible item on row. + * @return Found keyvalue or null if none found. * @throws IOException */ - KeyValue getRowKeyAtOrBefore(final KeyValue targetkey) - throws IOException{ - // Map of keys that are candidates for holding the row key that - // most closely matches what we're looking for. We'll have to update it as - // deletes are found all over the place as we go along before finally - // reading the best key out of it at the end. Use a comparator that - // ignores key types. Otherwise, we can't remove deleted items doing - // set.remove because of the differing type between insert and delete. - NavigableSet candidates = - new TreeSet(this.comparator.getComparatorIgnoringType()); - - // Keep a list of deleted cell keys. We need this because as we go through - // the store files, the cell with the delete marker may be in one file and - // the old non-delete cell value in a later store file. If we don't keep - // around the fact that the cell was deleted in a newer record, we end up - // returning the old value if user is asking for more than one version. - // This List of deletes should not be large since we are only keeping rows - // and columns that match those set on the scanner and which have delete - // values. If memory usage becomes an issue, could redo as bloom filter. - NavigableSet deletes = - new TreeSet(this.comparatorIgnoringType); - long now = System.currentTimeMillis(); + KeyValue getRowKeyAtOrBefore(final KeyValue kv) + throws IOException { + GetClosestRowBeforeTracker state = + new GetClosestRowBeforeTracker(this.comparator, kv, this.ttl); this.lock.readLock().lock(); try { // First go to the memstore. Pick up deletes and candidates. - this.memstore.getRowKeyAtOrBefore(targetkey, candidates, deletes, now); + this.memstore.getRowKeyAtOrBefore(state); // Process each store file. Run through from newest to oldest. Map m = this.storefiles.descendingMap(); for (Map.Entry e: m.entrySet()) { // Update the candidate keys from the current map file - rowAtOrBeforeFromStoreFile(e.getValue(), targetkey, candidates, - deletes, now); + rowAtOrBeforeFromStoreFile(e.getValue(), state); } - // Return the best key from candidateKeys - return candidates.isEmpty()? null: candidates.last(); + return state.getCandidate(); } finally { this.lock.readLock().unlock(); } @@ -1081,54 +1066,33 @@ * Check an individual MapFile for the row at or before a given key * and timestamp * @param f - * @param targetkey - * @param candidates Pass a Set with a Comparator that - * ignores key Type so we can do Set.remove using a delete, i.e. a KeyValue - * with a different Type to the candidate key. + * @param state * @throws IOException */ private void rowAtOrBeforeFromStoreFile(final StoreFile f, - final KeyValue targetkey, final NavigableSet candidates, - final NavigableSet deletes, final long now) + final GetClosestRowBeforeTracker state) throws IOException { // if there aren't any candidate keys yet, we'll do some things different - if (candidates.isEmpty()) { - rowAtOrBeforeCandidate(f, targetkey, candidates, deletes, now); + if (!state.hasCandidate()) { + rowAtOrBeforeCandidate(f, state); } else { - rowAtOrBeforeWithCandidates(f, targetkey, candidates, deletes, now); + rowAtOrBeforeWithCandidates(f, state); } } - /* - * @param ttlSetting - * @param hsk - * @param now - * @param deletes A Set whose Comparator ignores Type. - * @return True if key has not expired and is not in passed set of deletes. - */ - static boolean notExpiredAndNotInDeletes(final long ttl, - final KeyValue key, final long now, final Set deletes) { - return !isExpired(key, now-ttl) && (deletes == null || deletes.isEmpty() || - !deletes.contains(key)); - } - static boolean isExpired(final KeyValue key, final long oldestTimestamp) { return key.getTimestamp() < oldestTimestamp; } /* Find a candidate for row that is at or before passed key, searchkey, in hfile. * @param f - * @param targetkey Key to go search the hfile with. - * @param candidates - * @param now + * @param state * @throws IOException * @see {@link #rowAtOrBeforeCandidate(HStoreKey, org.apache.hadoop.io.MapFile.Reader, byte[], SortedMap, long)} */ private void rowAtOrBeforeCandidate(final StoreFile f, - final KeyValue targetkey, final NavigableSet candidates, - final NavigableSet deletes, final long now) + final GetClosestRowBeforeTracker state) throws IOException { - KeyValue search = targetkey; // If the row we're looking for is past the end of this mapfile, set the // search key to be the last key. If its a deleted key, then we'll back // up to the row before and return that. @@ -1141,7 +1105,8 @@ byte [] lastkey = r.getLastKey(); KeyValue lastKeyValue = KeyValue.createKeyValueFromKey(lastkey, 0, lastkey.length); - if (this.comparator.compareRows(lastKeyValue, targetkey) < 0) { + KeyValue search = state.getTargetKey(); + if (this.comparator.compareRows(lastKeyValue, search) < 0) { search = lastKeyValue; } KeyValue knownNoGoodKey = null; @@ -1159,7 +1124,7 @@ do { kv = scanner.getKeyValue(); if (this.comparator.compareRows(kv, search) <= 0) { - if (!kv.isDeleteType()) { + if (!kv.isDelete()) { if (handleNonDelete(kv, now, deletes, candidates)) { foundCandidate = true; // NOTE! Continue. @@ -1178,7 +1143,7 @@ // So, the row key doesn't match, but we haven't gone past the row // we're seeking yet, so this row is a candidate for closest // (assuming that it isn't a delete). - if (!kv.isDeleteType()) { + if (!kv.isDelete()) { if (handleNonDelete(kv, now, deletes, candidates)) { foundCandidate = true; // NOTE: Continue @@ -1216,9 +1181,7 @@ } private void rowAtOrBeforeWithCandidates(final StoreFile f, - final KeyValue targetkey, - final NavigableSet candidates, - final NavigableSet deletes, final long now) + final GetClosestRowBeforeTracker state) throws IOException { // if there are already candidate keys, we need to start our search // at the earliest possible key so that we can discover any possible @@ -1265,59 +1228,7 @@ } while(scanner.next()); } - /* - * Used calculating keys at or just before a passed key. - * @param readkey - * @param now - * @param deletes Set with Comparator that ignores key type. - * @param candidate Set with Comprator that ignores key type. - */ - private void handleKey(final KeyValue readkey, final long now, - final NavigableSet deletes, - final NavigableSet candidates) { - if (!readkey.isDeleteType()) { - handleNonDelete(readkey, now, deletes, candidates); - } else { - handleDeletes(readkey, candidates, deletes); - } - } - - /* - * Used calculating keys at or just before a passed key. - * @param readkey - * @param now - * @param deletes Set with Comparator that ignores key type. - * @param candidates Set with Comparator that ignores key type. - * @return True if we added a candidate. - */ - private boolean handleNonDelete(final KeyValue readkey, final long now, - final NavigableSet deletes, - final NavigableSet candidates) { - if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) { - candidates.add(readkey); - return true; - } - return false; - } - /** - * Handle keys whose values hold deletes. - * Add to the set of deletes and then if the candidate keys contain any that - * might match, then check for a match and remove it. Implies candidates - * is made with a Comparator that ignores key type. - * @param k - * @param candidates - * @param deletes - * @return True if we removed k from candidates. - */ - static boolean handleDeletes(final KeyValue k, - final NavigableSet candidates, - final NavigableSet deletes) { - deletes.add(k); - return candidates.remove(k); - } - - /** * Determines if HStore can be split * @param force Whether to force a split or not. * @return a StoreSize if store can be split, null otherwise. Index: src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java =================================================================== --- src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (working copy) @@ -230,7 +230,7 @@ public boolean filterRow(List kvs) { for (KeyValue kv: kvs) { byte [] column = kv.getColumn(); - if (nullColumns.contains(column) && !kv.isDeleteType()) { + if (nullColumns.contains(column) && !kv.isDelete()) { return true; } if (!equalsMap.containsKey(column)) { Index: src/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- src/java/org/apache/hadoop/hbase/KeyValue.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -22,6 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Comparator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -128,7 +129,7 @@ return compare(a, 0, a.length, b, 0, b.length); } }; - + /** * Get the appropriate row comparator for the specified table. * @@ -291,11 +292,12 @@ /** * Constructs KeyValue structure filled with null value. + * Sets type to {@link KeyValue.Type#Maximum} * @param row - row key (arbitrary byte array) * @param timestamp */ public KeyValue(final byte [] row, final long timestamp) { - this(row, timestamp, Type.Put); + this(row, timestamp, Type.Maximum); } /** @@ -309,13 +311,14 @@ /** * Constructs KeyValue structure filled with null value. + * Sets type to {@link KeyValue.Type#Maximum} * @param row - row key (arbitrary byte array) * @param family family name * @param qualifier column qualifier */ public KeyValue(final byte [] row, final byte [] family, final byte [] qualifier) { - this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Put); + this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum); } /** @@ -569,21 +572,30 @@ * @return Fully copied clone of this KeyValue */ public KeyValue clone() { - byte [] bytes = new byte[this.length]; - System.arraycopy(this.bytes, this.offset, bytes, 0, this.length); - return new KeyValue(bytes, 0, bytes.length); + byte [] b = new byte[this.length]; + System.arraycopy(this.bytes, this.offset, b, 0, this.length); + return new KeyValue(b, 0, b.length); } /** * Clones a row. + * @return Clone of bb's key portion with only row and + * {@link HConstants#LATEST_TIMESTAMP} filled in. + */ + public KeyValue cloneRow() { + return cloneRow(HConstants.LATEST_TIMESTAMP); + } + + /** + * Clones a row. * * @param timestamp The new time stamp for the row. - * @return Clone of bb's key portion with only the row and timestamp filled in. + * @return Clone of bb's key portion with only row and timestamp filled in. */ public KeyValue cloneRow(final long timestamp) { return new KeyValue(getBuffer(), getRowOffset(), getRowLength(), null, 0, 0, null, 0, 0, - timestamp, Type.codeToType(getType()), null, 0, 0); + timestamp, Type.Maximum, null, 0, 0); } /** @@ -946,17 +958,20 @@ } /** - * @return True if Delete KeyValue type. + * @return True if a delete type, a {@link KeyValue.Type#Delete} or + * a {KeyValue.Type#DeleteFamily} or a {@link KeyValue.Type.DeleteColumn} + * KeyValue type. */ - public boolean isDeleteType() { - return getType() == Type.Delete.code; + public boolean isDelete() { + int t = getType(); + return Type.Delete.getCode() <= t && t <= Type.DeleteFamily.getCode(); } /** - * @return True if DeleteColumn KeyValue type. + * @return True if this KV is a delete family type. */ - public boolean isDeleteColumnType() { - return getType() == Type.DeleteColumn.code; + public boolean isDeleteFamily() { + return getType() == Type.DeleteFamily.getCode(); } /** @@ -1659,6 +1674,21 @@ } /** + * Comparator that compares row component only of a KeyValue. + */ + public static class RowComparator implements Comparator { + final KVComparator comparator; + + public RowComparator(final KVComparator c) { + this.comparator = c; + } + + public int compare(KeyValue left, KeyValue right) { + return comparator.compareRows(left, right); + } + } + + /** * Compare key portion of a {@link KeyValue} for keys in .META. * table. */ Index: src/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -577,7 +577,7 @@ REGIONINFO_QUALIFIER); if (value == null || value.length == 0) { throw new IOException("HRegionInfo was null or empty in " + - Bytes.toString(parentTable) + ", " + regionInfoRow); + Bytes.toString(parentTable) + ", row=" + regionInfoRow); } // convert the row result into the HRegionLocation we need! HRegionInfo regionInfo = (HRegionInfo) Writables.getWritable( Index: src/java/org/apache/hadoop/hbase/client/Result.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/Result.java (revision 803512) +++ src/java/org/apache/hadoop/hbase/client/Result.java (working copy) @@ -499,4 +499,4 @@ } return results; } -} +} \ No newline at end of file