Index: src/test/org/apache/hadoop/hbase/HBaseTestCase.java =================================================================== --- src/test/org/apache/hadoop/hbase/HBaseTestCase.java (revision 803766) +++ src/test/org/apache/hadoop/hbase/HBaseTestCase.java (working copy) @@ -62,8 +62,7 @@ protected final static byte [] fam1 = Bytes.toBytes("colfamily1"); protected final static byte [] fam2 = Bytes.toBytes("colfamily2"); protected final static byte [] fam3 = Bytes.toBytes("colfamily3"); - protected static final byte [][] COLUMNS = {fam1, - fam2, fam3}; + protected static final byte [][] COLUMNS = {fam1, fam2, fam3}; private boolean localfs = false; protected Path testDir = null; Index: src/test/org/apache/hadoop/hbase/regionserver/TestGetClosestAtOrBefore.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestGetClosestAtOrBefore.java (revision 0) +++ src/test/org/apache/hadoop/hbase/regionserver/TestGetClosestAtOrBefore.java (revision 0) @@ -0,0 +1,233 @@ +/** + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; + +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.TestGet; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hdfs.MiniDFSCluster; + +/** + * {@link TestGet} is a medley of tests of get all done up as a single test. + * This class + */ +public class TestGetClosestAtOrBefore extends HBaseTestCase implements HConstants { + private MiniDFSCluster miniHdfs; + + private static final byte [] T00 = Bytes.toBytes("000"); + private static final byte [] T10 = Bytes.toBytes("010"); + private static final byte [] T11 = Bytes.toBytes("011"); + private static final byte [] T12 = Bytes.toBytes("012"); + private static final byte [] T20 = Bytes.toBytes("020"); + private static final byte [] T30 = Bytes.toBytes("030"); + private static final byte [] T31 = Bytes.toBytes("031"); + private static final byte [] T35 = Bytes.toBytes("035"); + private static final byte [] T40 = Bytes.toBytes("040"); + + @Override + protected void setUp() throws Exception { + super.setUp(); + this.miniHdfs = new MiniDFSCluster(this.conf, 1, true, null); + // Set the hbase.rootdir to be the home directory in mini dfs. + this.conf.set(HConstants.HBASE_DIR, + this.miniHdfs.getFileSystem().getHomeDirectory().toString()); + } + + /** + * Test file of multiple deletes and with deletes as final key. + * @see HBASE-751 + */ + public void testGetClosestRowBefore3() throws IOException{ + HRegion region = null; + byte [] c0 = COLUMNS[0]; + byte [] c1 = COLUMNS[1]; + try { + HTableDescriptor htd = createTableDescriptor(getName()); + region = createNewHRegion(htd, null, null); + + Put p = new Put(T00); + p.add(c0, c0, T00); + region.put(p); + + p = new Put(T10); + p.add(c0, c0, T10); + region.put(p); + + p = new Put(T20); + p.add(c0, c0, T20); + region.put(p); + + Result r = region.getClosestRowBefore(T20, c0); + assertTrue(Bytes.equals(T20, r.getRow())); + + Delete d = new Delete(T20); + d.deleteColumn(c0, c0); + region.delete(d, null, false); + + r = region.getClosestRowBefore(T20, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + p = new Put(T30); + p.add(c0, c0, T30); + region.put(p); + + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T30, r.getRow())); + + d = new Delete(T30); + d.deleteColumn(c0, c0); + region.delete(d, null, false); + + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + region.flushcache(); + + // try finding "010" after flush + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + // Put into a different column family. Should make it so I still get t10 + p = new Put(T20); + p.add(c1, c1, T20); + region.put(p); + + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + region.flushcache(); + + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + // Now try combo of memcache and mapfiles. Delete the t20 COLUMS[1] + // in memory; make sure we get back t10 again. + d = new Delete(T20); + d.deleteColumn(c1, c1); + region.delete(d, null, false); + r = region.getClosestRowBefore(T30, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + // Ask for a value off the end of the file. Should return t10. + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + region.flushcache(); + r = region.getClosestRowBefore(T31, c0); + assertTrue(Bytes.equals(T10, r.getRow())); + + // Ok. Let the candidate come out of hfile but have delete of + // the candidate be in memory. + p = new Put(T11); + p.add(c0, c0, T11); + region.put(p); + d = new Delete(T10); + d.deleteColumn(c1, c1); + r = region.getClosestRowBefore(T12, c0); + assertTrue(Bytes.equals(T11, r.getRow())); + } finally { + if (region != null) { + try { + region.close(); + } catch (Exception e) { + e.printStackTrace(); + } + region.getLog().closeAndDelete(); + } + } + } + + /** For HBASE-694 */ + public void testGetClosestRowBefore2() throws IOException{ + HRegion region = null; + byte [] c0 = COLUMNS[0]; + try { + HTableDescriptor htd = createTableDescriptor(getName()); + region = createNewHRegion(htd, null, null); + + Put p = new Put(T10); + p.add(c0, c0, T10); + region.put(p); + + p = new Put(T30); + p.add(c0, c0, T30); + region.put(p); + + p = new Put(T40); + p.add(c0, c0, T40); + region.put(p); + + // try finding "035" + Result r = region.getClosestRowBefore(T35, c0); + assertTrue(Bytes.equals(T30, r.getRow())); + + region.flushcache(); + + // try finding "035" + r = region.getClosestRowBefore(T35, c0); + assertTrue(Bytes.equals(T30, r.getRow())); + + p = new Put(T20); + p.add(c0, c0, T20); + region.put(p); + + // try finding "035" + r = region.getClosestRowBefore(T35, c0); + assertTrue(Bytes.equals(T30, r.getRow())); + + region.flushcache(); + + // try finding "035" + r = region.getClosestRowBefore(T35, c0); + assertTrue(Bytes.equals(T30, r.getRow())); + } finally { + if (region != null) { + try { + region.close(); + } catch (Exception e) { + e.printStackTrace(); + } + region.getLog().closeAndDelete(); + } + } + } + + @Override + protected void tearDown() throws Exception { + if (this.miniHdfs != null) { + this.miniHdfs.shutdown(); + } + super.tearDown(); + } +} \ No newline at end of file Index: src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java (revision 803766) +++ src/test/org/apache/hadoop/hbase/regionserver/TestMemStore.java (working copy) @@ -42,7 +42,7 @@ private final Log LOG = LogFactory.getLog(this.getClass()); private MemStore memstore; private static final int ROW_COUNT = 10; - private static final int QUALIFIER_COUNT = 10; + private static final int QUALIFIER_COUNT = ROW_COUNT; private static final byte [] FAMILY = Bytes.toBytes("column"); private static final byte [] CONTENTS_BASIC = Bytes.toBytes("contents:basic"); private static final String CONTENTSTR = "contentstr"; @@ -53,6 +53,62 @@ this.memstore = new MemStore(); } + /** For HBASE-528 */ + public void testGetRowKeyAtOrBefore() { + // set up some test data + byte [] t10 = Bytes.toBytes("010"); + byte [] t20 = Bytes.toBytes("020"); + byte [] t30 = Bytes.toBytes("030"); + byte [] t35 = Bytes.toBytes("035"); + byte [] t40 = Bytes.toBytes("040"); + + memstore.add(getKV(t10, "t10 bytes".getBytes())); + memstore.add(getKV(t20, "t20 bytes".getBytes())); + memstore.add(getKV(t30, "t30 bytes".getBytes())); + memstore.add(getKV(t35, "t35 bytes".getBytes())); + // write a delete in there to see if things still work ok + memstore.add(getDeleteKV(t35)); + memstore.add(getKV(t40, "t40 bytes".getBytes())); + + // try finding "015" + KeyValue t15kv = new KeyValue(Bytes.toBytes("015"), + System.currentTimeMillis()); + GetClosestRowBeforeTracker tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t15kv, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + assertTrue(KeyValue.COMPARATOR.compareRows(tracker.getCandidate(), t10) == 0); + + // try "020", we should get that row exactly + KeyValue t20kv = new KeyValue(t20, System.currentTimeMillis()); + tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t20kv, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + assertTrue(KeyValue.COMPARATOR.compareRows(tracker.getCandidate(), t20) == 0); + + // try "030", we should get that row exactly + KeyValue t30kv = new KeyValue(t30, System.currentTimeMillis()); + tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t30kv, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + assertTrue(KeyValue.COMPARATOR.compareRows(tracker.getCandidate(), t30) == 0); + + // try "038", should skip the deleted "035" and give "030" + KeyValue t38kv = new KeyValue(Bytes.toBytes("038"), + System.currentTimeMillis()); + tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t38kv, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + assertTrue(KeyValue.COMPARATOR.compareRows(tracker.getCandidate(), t30) == 0); + + // try "050", should get stuff from "040" + KeyValue t50kv = new KeyValue(Bytes.toBytes("050"), + System.currentTimeMillis()); + tracker = + new GetClosestRowBeforeTracker(KeyValue.COMPARATOR, t50kv, Integer.MAX_VALUE); + memstore.getRowKeyAtOrBefore(tracker); + assertTrue(KeyValue.COMPARATOR.compareRows(tracker.getCandidate(), t40) == 0); + } + public void testPutSameKey() { byte [] bytes = Bytes.toBytes(getName()); KeyValue kv = new KeyValue(bytes, bytes, bytes, bytes); @@ -82,6 +138,8 @@ while (s.next(result)) { LOG.info(result); count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); result.clear(); } } finally { @@ -98,6 +156,8 @@ // Assert the stuff is coming out in right order. assertTrue(Bytes.compareTo(Bytes.toBytes(count), result.get(0).getRow()) == 0); count++; + // Row count is same as column count. + assertEquals(rowCount, result.size()); if (count == 2) { this.memstore.snapshot(); LOG.info("Snapshotted"); @@ -108,6 +168,34 @@ s.close(); } assertEquals(rowCount, count); + // Assert that new values are seen in kvset as we scan. + long ts = System.currentTimeMillis(); + s = new StoreScanner(scan, null, HConstants.LATEST_TIMESTAMP, + this.memstore.comparator, null, memstorescanners); + count = 0; + int snapshotIndex = 5; + try { + while (s.next(result)) { + LOG.info(result); + // Assert the stuff is coming out in right order. + assertTrue(Bytes.compareTo(Bytes.toBytes(count), result.get(0).getRow()) == 0); + // Row count is same as column count. + // TODO PUTBACK assertEquals("count=" + count + ", result=" + result, + // rowCount, result.size()); + count++; + if (count == snapshotIndex) { + this.memstore.snapshot(); + this.memstore.clearSnapshot(this.memstore.getSnapshot()); + // Added more rows into kvset. + addRows(this.memstore, ts); + LOG.info("Snapshotted, cleared it and then added values"); + } + result.clear(); + } + } finally { + s.close(); + } + assertEquals(rowCount, count); } /** @@ -126,7 +214,7 @@ } public void testMultipleVersionsSimple() throws Exception { - MemStore m = new MemStore(HConstants.FOREVER, KeyValue.COMPARATOR); + MemStore m = new MemStore(KeyValue.COMPARATOR); byte [] row = Bytes.toBytes("testRow"); byte [] family = Bytes.toBytes("testFamily"); byte [] qf = Bytes.toBytes("testQualifier"); @@ -146,7 +234,7 @@ } public void testBinary() throws IOException { - MemStore mc = new MemStore(HConstants.FOREVER, KeyValue.ROOT_COMPARATOR); + MemStore mc = new MemStore(KeyValue.ROOT_COMPARATOR); final int start = 43; final int end = 46; for (int k = start; k <= end; k++) { @@ -180,66 +268,7 @@ ////////////////////////////////////////////////////////////////////////////// // Get tests ////////////////////////////////////////////////////////////////////////////// - /** For HBASE-528 */ - public void testGetRowKeyAtOrBefore() { - // set up some test data - byte [] t10 = Bytes.toBytes("010"); - byte [] t20 = Bytes.toBytes("020"); - byte [] t30 = Bytes.toBytes("030"); - byte [] t35 = Bytes.toBytes("035"); - byte [] t40 = Bytes.toBytes("040"); - - memstore.add(getKV(t10, "t10 bytes".getBytes())); - memstore.add(getKV(t20, "t20 bytes".getBytes())); - memstore.add(getKV(t30, "t30 bytes".getBytes())); - memstore.add(getKV(t35, "t35 bytes".getBytes())); - // write a delete in there to see if things still work ok - memstore.add(getDeleteKV(t35)); - memstore.add(getKV(t40, "t40 bytes".getBytes())); - - NavigableSet results = null; - - // try finding "015" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - KeyValue t15 = new KeyValue(Bytes.toBytes("015"), - System.currentTimeMillis()); - memstore.getRowKeyAtOrBefore(t15, results); - KeyValue kv = results.last(); - assertTrue(KeyValue.COMPARATOR.compareRows(kv, t10) == 0); - // try "020", we should get that row exactly - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - memstore.getRowKeyAtOrBefore(new KeyValue(t20, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t20) == 0); - - // try "030", we should get that row exactly - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - memstore.getRowKeyAtOrBefore(new KeyValue(t30, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); - - // try "038", should skip the deleted "035" and give "030" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - byte [] t38 = Bytes.toBytes("038"); - memstore.getRowKeyAtOrBefore(new KeyValue(t38, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t30) == 0); - - // try "050", should get stuff from "040" - results = - new TreeSet(this.memstore.comparator.getComparatorIgnoringType()); - byte [] t50 = Bytes.toBytes("050"); - memstore.getRowKeyAtOrBefore(new KeyValue(t50, System.currentTimeMillis()), - results); - assertTrue(KeyValue.COMPARATOR.compareRows(results.last(), t40) == 0); - } - - /** Test getNextRow from memstore * @throws InterruptedException */ @@ -606,8 +635,19 @@ * @throws IOException */ private int addRows(final MemStore hmc) { + return addRows(hmc, HConstants.LATEST_TIMESTAMP); + } + + /** + * Adds {@link #ROW_COUNT} rows and {@link #COLUMNS_COUNT} + * @param hmc Instance to add rows to. + * @return How many rows we added. + * @throws IOException + */ + private int addRows(final MemStore hmc, final long ts) { for (int i = 0; i < ROW_COUNT; i++) { - long timestamp = System.currentTimeMillis(); + long timestamp = ts == HConstants.LATEST_TIMESTAMP? + System.currentTimeMillis(): ts; for (int ii = 0; ii < QUALIFIER_COUNT; ii++) { byte [] row = Bytes.toBytes(i); byte [] qf = makeQualifier(i, ii); Index: src/test/org/apache/hadoop/hbase/regionserver/TestHRegion.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestHRegion.java (revision 803766) +++ src/test/org/apache/hadoop/hbase/regionserver/TestHRegion.java (working copy) @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import java.util.Map; import java.util.TreeMap; import org.apache.commons.logging.Log; @@ -35,7 +34,6 @@ import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; Index: src/test/org/apache/hadoop/hbase/regionserver/TestStore.java =================================================================== --- src/test/org/apache/hadoop/hbase/regionserver/TestStore.java (revision 803766) +++ src/test/org/apache/hadoop/hbase/regionserver/TestStore.java (working copy) @@ -406,5 +406,4 @@ long storeTs = results.get(0).getTimestamp(); assertTrue(icvTs != storeTs); } - -} +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java (revision 0) +++ src/java/org/apache/hadoop/hbase/regionserver/GetClosestRowBeforeTracker.java (revision 0) @@ -0,0 +1,228 @@ +/* + * Copyright 2009 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.util.NavigableMap; +import java.util.NavigableSet; +import java.util.TreeMap; +import java.util.TreeSet; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * State processing {@link HRegion#getClosestRowBefore(byte[], byte[])}. + * Like {@link GetDeleteTracker} and {@link ScanDeleteTracker} but does not + * implement the {@link DeleteTracker} interface since the below can span row. + */ +class GetClosestRowBeforeTracker { + private final long ts = System.currentTimeMillis(); + private final KeyValue targetkey; + private final long ttl; + private KeyValue candidate = null; + private final KeyValue.RowComparator rc; + private final KVComparator kvcomparator; + + // Deletes keyed row. Uses a comparator that compares on row portion of + // the KeyValue only. + private final NavigableMap> deletes; + + /** + * @param c + * @param kv Presume first on row: i.e. empty column and empty ts. + * @param ttl Time to live in ms. + */ + GetClosestRowBeforeTracker(final KVComparator c, final KeyValue kv, + final long ttl) { + super(); + this.targetkey = kv; + this.ttl = ttl; + this.kvcomparator = c; + this.rc = new KeyValue.RowComparator(this.kvcomparator); + this.deletes = new TreeMap>(this.rc); + } + + /** + * @param kv + * @return True if this kv is expired. + */ + boolean isExpired(final KeyValue kv) { + return kv.getTimestamp() < (this.ts - this.ttl); + } + + /* + * Add the specified KeyValue to the list of deletes. + * @param kv + */ + private void addDelete(final KeyValue kv) { + NavigableSet rowdeletes = this.deletes.get(kv); + if (rowdeletes == null) { + rowdeletes = new TreeSet(this.kvcomparator); + this.deletes.put(kv, rowdeletes); + } + rowdeletes.add(kv); + } + + /* + * @param kv Adds candidate if nearer the target than previous candidate. + * @return True if updated candidate. + */ + private boolean addCandidate(final KeyValue kv) { + if (!isDeleted(kv) && isBetterCandidate(kv)) { + this.candidate = kv; + return true; + } + return false; + } + + boolean isBetterCandidate(final KeyValue contender) { + return this.candidate == null || + (this.rc.compare(this.candidate, contender) < 0 && + this.rc.compare(contender, this.targetkey) <= 0); + } + + /* + * Check if the specified KeyValue buffer has been deleted by a previously + * seen delete. + * @param buffer KeyValue buffer + * @param qualifierOffset column qualifier offset + * @param qualifierLength column qualifier length + * @param timestamp timestamp + * @return true is the specified KeyValue is deleted, false if not + */ + private boolean isDeleted(final KeyValue kv) { + if (this.deletes.isEmpty()) return false; + NavigableSet rowdeletes = this.deletes.get(kv); + if (rowdeletes == null || rowdeletes.isEmpty()) return false; + return isDeleted(kv, rowdeletes); + } + + /** + * Check if the specified KeyValue buffer has been deleted by a previously + * seen delete. + * @param kv + * @param ds + * @return true is the specified KeyValue is deleted, false if not + */ + public boolean isDeleted(final KeyValue kv, final NavigableSet ds) { + if (deletes == null || deletes.isEmpty()) return false; + for (KeyValue d: ds) { + long kvts = kv.getTimestamp(); + long dts = d.getTimestamp(); + if (d.isDeleteFamily()) { + if (kvts <= dts) return true; + continue; + } + + // Check column + int ret = Bytes.compareTo(kv.getBuffer(), kv.getQualifierOffset(), kv.getQualifierLength(), + d.getBuffer(), d.getQualifierOffset(), d.getQualifierLength()); + if (ret <= -1) { + // This delete is for an earlier column. + continue; + } else if (ret >= 1) { + // Beyond this kv. + break; + } + // Check Timestamp + if (kvts > dts) return false; + + // Check Type + switch (KeyValue.Type.codeToType(d.getType())) { + case Delete: return kvts == dts; + case DeleteColumn: return true; + default: continue; + } + } + return false; + } + + /* + * Handle keys whose values hold deletes. + * Add to the set of deletes and then if the candidate keys contain any that + * might match, then check for a match and remove it. Implies candidates + * is made with a Comparator that ignores key type. + * @param kv + * @return True if we removed k from candidates. + */ + boolean handleDeletes(final KeyValue kv) { + addDelete(kv); + boolean deleted = false; + if (!hasCandidate()) return deleted; + // We could encounter deletes when walking the memstore in reverse so check + // the just added delete doesn't overwrite the candidate? + if (isDeleted(this.candidate)) { + this.candidate = null; + deleted = true; + } + return deleted; + } + + /** + * Do right thing with passed key, add to deletes or add to candidates. + * @param kv + * @return True if we added a candidate + */ + boolean handle(final KeyValue kv) { + if (kv.isDelete()) { + handleDeletes(kv); + return false; + } + return addCandidate(kv); + } + + /** + * @return True if has candidate + */ + public boolean hasCandidate() { + return this.candidate != null; + } + + /** + * @return Best candidate or null. + */ + public KeyValue getCandidate() { + return this.candidate; + } + + public KeyValue getTargetKey() { + return this.targetkey; + } + + /** + * @param kv Current kv + * @param First on row kv. + * @param state + * @return True if we went too far, past the target key. + */ + boolean isTooFar(final KeyValue kv, final KeyValue firstOnRow) { + return this.kvcomparator.compareRows(kv, firstOnRow) > 0; + } + + /** + * @return True if a candidate AND it matches targetkey, the + * row we were asked to search for originally. + */ + boolean hasMatch() { + if (!hasCandidate()) return false; + return this.kvcomparator.compareRows(getCandidate(), this.targetkey) == 0; + } +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (working copy) @@ -145,7 +145,6 @@ * @return true if there are more rows, false if scanner is done */ public synchronized boolean next(List outResult) throws IOException { - List results = new ArrayList(); KeyValue peeked = this.heap.peek(); if (peeked == null) { close(); @@ -153,6 +152,7 @@ } matcher.setRow(peeked.getRow()); KeyValue kv; + List results = new ArrayList(); while((kv = this.heap.peek()) != null) { QueryMatcher.MatchCode qcode = matcher.match(kv); switch(qcode) { @@ -162,7 +162,6 @@ continue; case DONE: - // copy jazz outResult.addAll(results); return true; @@ -198,7 +197,6 @@ if (!results.isEmpty()) { // copy jazz outResult.addAll(results); - return true; } Index: src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/regionserver/GetDeleteTracker.java (working copy) @@ -38,8 +38,8 @@ * This class is NOT thread-safe as queries are never multi-threaded */ public class GetDeleteTracker implements DeleteTracker { - - private long familyStamp = -1L; + private static long UNSET = -1L; + private long familyStamp = UNSET; protected List deletes = null; private List newDeletes = new ArrayList(); private Iterator iterator; @@ -64,7 +64,7 @@ @Override public void add(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp, byte type) { - if(type == KeyValue.Type.DeleteFamily.getCode()) { + if (type == KeyValue.Type.DeleteFamily.getCode()) { if(timestamp > familyStamp) { familyStamp = timestamp; } @@ -88,14 +88,13 @@ @Override public boolean isDeleted(byte [] buffer, int qualifierOffset, int qualifierLength, long timestamp) { - // Check against DeleteFamily if (timestamp <= familyStamp) { return true; } // Check if there are other deletes - if(this.delete == null) { + if (this.delete == null) { return false; } @@ -103,7 +102,7 @@ int ret = Bytes.compareTo(buffer, qualifierOffset, qualifierLength, this.delete.buffer, this.delete.qualifierOffset, this.delete.qualifierLength); - if(ret <= -1) { + if (ret <= -1) { // Have not reached the next delete yet return false; } else if(ret >= 1) { @@ -149,7 +148,7 @@ @Override public boolean isEmpty() { - if(this.familyStamp == 0L && this.delete == null) { + if(this.familyStamp == UNSET && this.delete == null && this.newDeletes.isEmpty()) { return true; } return false; @@ -160,7 +159,7 @@ this.deletes = null; this.delete = null; this.newDeletes = new ArrayList(); - this.familyStamp = 0L; + this.familyStamp = UNSET; this.iterator = null; } @@ -173,7 +172,7 @@ @Override public void update() { // If no previous deletes, use new deletes and return - if(this.deletes == null || this.deletes.size() == 0) { + if (this.deletes == null || this.deletes.size() == 0) { finalize(this.newDeletes); return; } Index: src/java/org/apache/hadoop/hbase/regionserver/MemStore.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/MemStore.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/regionserver/MemStore.java (working copy) @@ -29,13 +29,11 @@ import java.util.List; import java.util.NavigableSet; import java.util.SortedSet; -import java.util.TreeSet; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.regionserver.DeleteCompare.DeleteCode; @@ -56,8 +54,6 @@ public class MemStore implements HeapSize { private static final Log LOG = LogFactory.getLog(MemStore.class); - private final long ttl; - // MemStore. Use a KeyValueSkipListSet rather than SkipListSet because of the // better semantics. The Map will overwrite if passed a key it already had // whereas the Set will not add new KV if key is same though value might be @@ -68,7 +64,7 @@ // Snapshot of memstore. Made for flusher. volatile KeyValueSkipListSet snapshot; - private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); final KeyValue.KVComparator comparator; @@ -85,7 +81,7 @@ * Default constructor. Used for tests. */ public MemStore() { - this(HConstants.FOREVER, KeyValue.COMPARATOR); + this(KeyValue.COMPARATOR); } /** @@ -93,8 +89,7 @@ * @param ttl The TTL for cache entries, in milliseconds. * @param c */ - public MemStore(final long ttl, final KeyValue.KVComparator c) { - this.ttl = ttl; + public MemStore(final KeyValue.KVComparator c) { this.comparator = c; this.comparatorIgnoreTimestamp = this.comparator.getComparatorIgnoringTimestamps(); @@ -184,15 +179,15 @@ * @return approximate size of the passed key and value. */ long add(final KeyValue kv) { - long size = -1; + long s = -1; this.lock.readLock().lock(); try { - size = heapSizeChange(kv, this.kvset.add(kv)); - this.size.addAndGet(size); + s = heapSizeChange(kv, this.kvset.add(kv)); + this.size.addAndGet(s); } finally { this.lock.readLock().unlock(); } - return size; + return s; } /** @@ -201,7 +196,7 @@ * @return approximate size of the passed key and value. */ long delete(final KeyValue delete) { - long size = 0; + long s = 0; this.lock.readLock().lock(); //Have to find out what we want to do here, to find the fastest way of //removing things that are under a delete. @@ -261,17 +256,17 @@ //Delete all the entries effected by the last added delete for (KeyValue kv : deletes) { notpresent = this.kvset.remove(kv); - size -= heapSizeChange(kv, notpresent); + s -= heapSizeChange(kv, notpresent); } // Adding the delete to memstore. Add any value, as long as // same instance each time. - size += heapSizeChange(delete, this.kvset.add(delete)); + s += heapSizeChange(delete, this.kvset.add(delete)); } finally { this.lock.readLock().unlock(); } - this.size.addAndGet(size); - return size; + this.size.addAndGet(s); + return s; } /** @@ -325,200 +320,117 @@ return result; } - /** - * @param row Row to look for. - * @param candidateKeys Map of candidate keys (Accumulation over lots of - * lookup over stores and memstores) + * @param state */ - void getRowKeyAtOrBefore(final KeyValue row, - final NavigableSet candidateKeys) { - getRowKeyAtOrBefore(row, candidateKeys, - new TreeSet(this.comparator), System.currentTimeMillis()); - } - - /** - * @param kv Row to look for. - * @param candidates Map of candidate keys (Accumulation over lots of - * lookup over stores and memstores). Pass a Set with a Comparator that - * ignores key Type so we can do Set.remove using a delete, i.e. a KeyValue - * with a different Type to the candidate key. - * @param deletes Pass a Set that has a Comparator that ignores key type. - * @param now - */ - void getRowKeyAtOrBefore(final KeyValue kv, - final NavigableSet candidates, - final NavigableSet deletes, final long now) { + void getRowKeyAtOrBefore(final GetClosestRowBeforeTracker state) { this.lock.readLock().lock(); try { - getRowKeyAtOrBefore(kvset, kv, candidates, deletes, now); - getRowKeyAtOrBefore(snapshot, kv, candidates, deletes, now); + getRowKeyAtOrBefore(kvset, state); + getRowKeyAtOrBefore(snapshot, state); } finally { this.lock.readLock().unlock(); } } + /* + * @param set + * @param state Accumulates deletes and candidates. + */ private void getRowKeyAtOrBefore(final NavigableSet set, - final KeyValue kv, final NavigableSet candidates, - final NavigableSet deletes, final long now) { + final GetClosestRowBeforeTracker state) { if (set.isEmpty()) { return; } - // We want the earliest possible to start searching from. Start before - // the candidate key in case it turns out a delete came in later. - KeyValue search = candidates.isEmpty()? kv: candidates.first(); + if (!walkForwardInSingleRow(set, state.getTargetKey(), state)) { + // Found nothing in row. Try backing up. + getRowKeyBefore(set, state); + } + } - // Get all the entries that come equal or after our search key - SortedSet tail = set.tailSet(search); - - // if there are items in the tail map, there's either a direct match to - // the search key, or a range of values between the first candidate key - // and the ultimate search key (or the end of the cache) - if (!tail.isEmpty() && - this.comparator.compareRows(tail.first(), search) <= 0) { - // Keep looking at cells as long as they are no greater than the - // ultimate search key and there's still records left in the map. - KeyValue deleted = null; - KeyValue found = null; - for (Iterator iterator = tail.iterator(); - iterator.hasNext() && (found == null || - this.comparator.compareRows(found, kv) <= 0);) { - found = iterator.next(); - if (this.comparator.compareRows(found, kv) <= 0) { - if (found.isDeleteType()) { - Store.handleDeletes(found, candidates, deletes); - if (deleted == null) { - deleted = found; - } - } else { - if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) { - candidates.add(found); - } else { - if (deleted == null) { - deleted = found; - } - // TODO: Check this removes the right key. - // Its expired. Remove it. - iterator.remove(); - } - } - } + /* + * @param set + * @param firstOnRow First possible key on this row. + * @param state + * @return True if we found a candidate walking this row. + */ + private boolean walkForwardInSingleRow(final SortedSet set, + final KeyValue firstOnRow, final GetClosestRowBeforeTracker state) { + boolean foundCandidate = false; + // Target key is first key on a row -- no column and latest timestamp. + SortedSet tail = set.tailSet(firstOnRow); + if (tail.isEmpty()) return foundCandidate; + for (Iterator i = tail.iterator(); i.hasNext();) { + KeyValue kv = i.next(); + // Did we go beyond the target row? If so break. + if (state.isTooFar(kv, firstOnRow)) break; + if (state.isExpired(kv)) { + i.remove(); + continue; } - if (candidates.isEmpty() && deleted != null) { - getRowKeyBefore(set, deleted, candidates, deletes, now); + // If we added something, this row is a contender. break. + if (state.handle(kv)) { + foundCandidate = true; + break; } - } else { - // The tail didn't contain any keys that matched our criteria, or was - // empty. Examine all the keys that proceed our splitting point. - getRowKeyBefore(set, search, candidates, deletes, now); } + return foundCandidate; } /* - * Get row key that comes before passed search_key - * Use when we know search_key is not in the map and we need to search - * earlier in the cache. + * Walk backwards through the passed set. * @param set - * @param search - * @param candidates - * @param deletes Pass a Set that has a Comparator that ignores key type. - * @param now + * @param state */ private void getRowKeyBefore(NavigableSet set, - KeyValue search, NavigableSet candidates, - final NavigableSet deletes, final long now) { - NavigableSet head = set.headSet(search, false); - // If we tried to create a headMap and got an empty map, then there are - // no keys at or before the search key, so we're done. - if (head.isEmpty()) { - return; + final GetClosestRowBeforeTracker state) { + // Presume state.getTargetKey is first on row. + KeyValue firstOnRow = state.getTargetKey(); + for (Member p = memberOfPreviousRow(set, state, firstOnRow); + p != null; p = memberOfPreviousRow(p.set, state, firstOnRow)) { + // Stop looking if we're not in the better candidate range. + if (!state.isBetterCandidate(p.kv)) break; + // Make into firstOnRow + firstOnRow = p.kv.cloneRow(); + // If we find something, break; + if (walkForwardInSingleRow(p.set, firstOnRow, state)) break; } + } - // If there aren't any candidate keys at this point, we need to search - // backwards until we find at least one candidate or run out of headMap. - if (candidates.isEmpty()) { - KeyValue lastFound = null; - // TODO: Confirm we're iterating in the right order - for (Iterator i = head.descendingIterator(); - i.hasNext();) { - KeyValue found = i.next(); - // if the last row we found a candidate key for is different than - // the row of the current candidate, we can stop looking -- if its - // not a delete record. - boolean deleted = found.isDeleteType(); - if (lastFound != null && - this.comparator.matchingRows(lastFound, found) && !deleted) { - break; - } - // If this isn't a delete, record it as a candidate key. Also - // take note of this candidate so that we'll know when - // we cross the row boundary into the previous row. - if (!deleted) { - if (Store.notExpiredAndNotInDeletes(this.ttl, found, now, deletes)) { - lastFound = found; - candidates.add(found); - } else { - // Its expired. - Store.expiredOrDeleted(set, found); - } - } else { - // We are encountering items in reverse. We may have just added - // an item to candidates that this later item deletes. Check. If we - // found something in candidates, remove it from the set. - if (Store.handleDeletes(found, candidates, deletes)) { - remove(set, found); - } - } - } - } else { - // If there are already some candidate keys, we only need to consider - // the very last row's worth of keys in the headMap, because any - // smaller acceptable candidate keys would have caused us to start - // our search earlier in the list, and we wouldn't be searching here. - SortedSet rowTail = - head.tailSet(head.last().cloneRow(HConstants.LATEST_TIMESTAMP)); - Iterator i = rowTail.iterator(); - do { - KeyValue found = i.next(); - if (found.isDeleteType()) { - Store.handleDeletes(found, candidates, deletes); - } else { - if (ttl == HConstants.FOREVER || - now < found.getTimestamp() + ttl || - !deletes.contains(found)) { - candidates.add(found); - } else { - Store.expiredOrDeleted(set, found); - } - } - } while (i.hasNext()); + /* + * Immutable data structure to hold member found and set it was found in. + * Include set because it is carrying context. + */ + private class Member { + final KeyValue kv; + final NavigableSet set; + Member(final NavigableSet s, final KeyValue kv) { + this.kv = kv; + this.set = s; } } - /* - * @param set - * @param delete This is a delete record. Remove anything behind this of same - * r/c/ts. - * @return True if we removed anything. + * @param set Set to walk back in. Pass a first in row or we'll return + * same row (loop). + * @param state Utility + * @param firstOnRow First item on the row after the one we want to find a + * member in. + * @return Null or member of row previous to firstOnRow */ - private boolean remove(final NavigableSet set, - final KeyValue delete) { - SortedSet s = set.tailSet(delete); - if (s.isEmpty()) { - return false; - } - boolean removed = false; - for (KeyValue kv: s) { - if (this.comparatorIgnoreType.compare(kv, delete) == 0) { - // Same r/c/ts. Remove it. - s.remove(kv); - removed = true; + private Member memberOfPreviousRow(NavigableSet set, + final GetClosestRowBeforeTracker state, final KeyValue firstOnRow) { + NavigableSet head = set.headSet(firstOnRow, false); + if (head.isEmpty()) return null; + for (Iterator i = head.descendingIterator(); i.hasNext();) { + KeyValue found = i.next(); + if (state.isExpired(found)) { + i.remove(); continue; } - break; + return new Member(head, found); } - return removed; + return null; } /** @@ -527,9 +439,8 @@ KeyValueScanner [] getScanners() { this.lock.readLock().lock(); try { - KeyValueScanner [] scanners = new KeyValueScanner[2]; - scanners[0] = new MemStoreScanner(this.kvset); - scanners[1] = new MemStoreScanner(this.snapshot); + KeyValueScanner [] scanners = new KeyValueScanner[1]; + scanners[0] = new MemStoreScanner(); return scanners; } finally { this.lock.readLock().unlock(); @@ -603,18 +514,17 @@ /* * MemStoreScanner implements the KeyValueScanner. - * It lets the caller scan the contents of a memstore. - * This behaves as if it were a real scanner but does not maintain position - * in the passed memstore tree. + * It lets the caller scan the contents of a memstore -- both current + * map and snapshot. + * This behaves as if it were a real scanner but does not maintain position. */ protected class MemStoreScanner implements KeyValueScanner { - private final NavigableSet kvs; - private KeyValue current = null; private List result = new ArrayList(); private int idx = 0; + private KeyValue firstOnNextRow = null; - MemStoreScanner(final NavigableSet s) { - this.kvs = s; + MemStoreScanner() { + super(); } public boolean seek(KeyValue key) { @@ -623,7 +533,7 @@ close(); return false; } - this.current = key; + this.firstOnNextRow = key; return cacheNextRow(); } catch(Exception e) { close(); @@ -652,35 +562,66 @@ } /** - * @return True if we successfully cached a NavigableSet aligned on - * next row. + * @return True if successfully cached a next row. */ boolean cacheNextRow() { - SortedSet keys; + this.result.clear(); + this.idx = 0; + // Prevent snapshot being cleared while caching a row. + lock.readLock().lock(); try { - keys = this.kvs.tailSet(this.current); - } catch (Exception e) { - close(); - return false; + // Look at each set, kvset and snapshot. + // Both look for matching entries for this.current row returning what + // they + // have as next row after this.current (or null if nothing in set or if + // nothing follows. + KeyValue kvsetNextRow = cacheNextRow(kvset); + KeyValue snapshotNextRow = cacheNextRow(snapshot); + if (kvsetNextRow == null && snapshotNextRow == null) { + // Nothing more in memstore but we might have gotten current row + // results + // Indicate at end of store by setting next row to null. + this.firstOnNextRow = null; + return !this.result.isEmpty(); + } else if (kvsetNextRow != null && snapshotNextRow != null) { + // Set current at the lowest of the two values. + int compare = comparator.compare(kvsetNextRow, snapshotNextRow); + this.firstOnNextRow = (compare <= 0) ? kvsetNextRow : snapshotNextRow; + } else { + this.firstOnNextRow = kvsetNextRow != null ? kvsetNextRow + : snapshotNextRow; + } + return true; + } finally { + lock.readLock().unlock(); } - if (keys == null || keys.isEmpty()) { - close(); - return false; - } - this.current = null; - byte [] row = keys.first().getRow(); - for (KeyValue kv: keys) { - if (comparator.compareRows(kv, row) != 0) { - this.current = kv; + } + + /* + * See if set has entries for the this.current row. If so, + * add them to this.result. + * @param set Set to examine + * @return Next row in passed set or null if nothing in this + * passed set + */ + private KeyValue cacheNextRow(final NavigableSet set) { + if (this.firstOnNextRow == null || set.isEmpty()) return null; + SortedSet tail = set.tailSet(this.firstOnNextRow); + if (tail == null || tail.isEmpty()) return null; + KeyValue first = tail.first(); + KeyValue nextRow = null; + for (KeyValue kv: tail) { + if (comparator.compareRows(first, kv) != 0) { + nextRow = kv; break; } - result.add(kv); + this.result.add(kv); } - return true; + return nextRow; } public void close() { - current = null; + firstOnNextRow = null; idx = 0; if (!result.isEmpty()) { result.clear(); @@ -764,4 +705,4 @@ } LOG.info("Exiting."); } -} +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/HRegion.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (working copy) @@ -1022,9 +1022,8 @@ * @return map of values * @throws IOException */ - public Result getClosestRowBefore(final byte [] row, - final byte [] family) - throws IOException{ + public Result getClosestRowBefore(final byte [] row, final byte [] family) + throws IOException { // look across all the HStores for this region and determine what the // closest key is across all column families, since the data may be sparse KeyValue key = null; @@ -1039,14 +1038,13 @@ return null; } List results = new ArrayList(); - // This will get all results for this store. TODO: Do I have to make a - // new key? + // This will get all results for this store. if (!this.comparator.matchingRows(kv, key)) { kv = new KeyValue(key.getRow(), HConstants.LATEST_TIMESTAMP); } Get get = new Get(key.getRow()); store.get(get, null, results); - + LOG.info("GETCLOSEST: " + kv.toString() + " " + key.toString()); return new Result(results); } finally { splitsAndClosesLock.readLock().unlock(); @@ -1161,7 +1159,7 @@ for (KeyValue kv: kvs) { // Check if time is LATEST, change to time of most recent addition if so // This is expensive. - if (kv.isLatestTimestamp() && kv.isDeleteType()) { + if (kv.isLatestTimestamp() && kv.isDelete()) { List result = new ArrayList(1); Get g = new Get(kv.getRow()); NavigableSet qualifiers = @@ -2430,6 +2428,7 @@ if (majorCompact) { region.compactStores(true); } else { + /* // Default behavior Scan scan = new Scan(); InternalScanner scanner = region.getScanner(scan); @@ -2443,7 +2442,8 @@ } while (done); } finally { scanner.close(); - } + }*/ + System.out.println(region.getClosestRowBefore(Bytes.toBytes("GeneratedCSVContent2,844EE3A3A260566C6A10D027C92D3796,9993372036854775807"))); } } finally { region.close(); @@ -2481,7 +2481,6 @@ printUsageAndExit("ERROR: Unrecognized option <" + args[1] + ">"); } majorCompact = true; - } Path tableDir = new Path(args[0]); HBaseConfiguration c = new HBaseConfiguration(); @@ -2497,4 +2496,4 @@ if (bc != null) bc.shutdown(); } } -} +} \ No newline at end of file Index: src/java/org/apache/hadoop/hbase/regionserver/Store.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/Store.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/regionserver/Store.java (working copy) @@ -25,11 +25,13 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NavigableMap; import java.util.NavigableSet; import java.util.Set; +import java.util.SortedSet; import java.util.TreeSet; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.CopyOnWriteArraySet; @@ -181,7 +183,7 @@ // second -> ms adjust for user data this.ttl *= 1000; } - this.memstore = new MemStore(this.ttl, this.comparator); + this.memstore = new MemStore(this.comparator); this.regionCompactionDir = new Path(HRegion.getCompactionDir(basedir), Integer.toString(info.getEncodedName())); this.storeName = this.family.getName(); @@ -1028,50 +1030,44 @@ } } + static boolean isExpired(final KeyValue key, final long oldestTimestamp) { + return key.getTimestamp() < oldestTimestamp; + } + /** * Find the key that matches row exactly, or the one that immediately * preceeds it. WARNING: Only use this method on a table where writes occur - * with stricly increasing timestamps. This method assumes this pattern of - * writes in order to make it reasonably performant. - * @param targetkey - * @return Found keyvalue + * with strictly increasing timestamps. This method assumes this pattern of + * writes in order to make it reasonably performant. Also dependent on the + * axiom that deletes are for cells that are in the container that follows + * whether a memstore snapshot or a storefile, not for the current container: + * i.e. we'll see deletes before we come across cells we are to delete. + * Presumption is that the memstore#kvset is processed before memstore#snapshot + * and so on. + * @param kv First possible item on targeted row. + * @return Found keyvalue or null if none found. * @throws IOException */ - KeyValue getRowKeyAtOrBefore(final KeyValue targetkey) - throws IOException{ - // Map of keys that are candidates for holding the row key that - // most closely matches what we're looking for. We'll have to update it as - // deletes are found all over the place as we go along before finally - // reading the best key out of it at the end. Use a comparator that - // ignores key types. Otherwise, we can't remove deleted items doing - // set.remove because of the differing type between insert and delete. - NavigableSet candidates = - new TreeSet(this.comparator.getComparatorIgnoringType()); - - // Keep a list of deleted cell keys. We need this because as we go through - // the store files, the cell with the delete marker may be in one file and - // the old non-delete cell value in a later store file. If we don't keep - // around the fact that the cell was deleted in a newer record, we end up - // returning the old value if user is asking for more than one version. - // This List of deletes should not be large since we are only keeping rows - // and columns that match those set on the scanner and which have delete - // values. If memory usage becomes an issue, could redo as bloom filter. - NavigableSet deletes = - new TreeSet(this.comparatorIgnoringType); - long now = System.currentTimeMillis(); + KeyValue getRowKeyAtOrBefore(final KeyValue kv) + throws IOException { + GetClosestRowBeforeTracker state = + new GetClosestRowBeforeTracker(this.comparator, kv, this.ttl); this.lock.readLock().lock(); try { // First go to the memstore. Pick up deletes and candidates. - this.memstore.getRowKeyAtOrBefore(targetkey, candidates, deletes, now); - // Process each store file. Run through from newest to oldest. - Map m = this.storefiles.descendingMap(); - for (Map.Entry e: m.entrySet()) { - // Update the candidate keys from the current map file - rowAtOrBeforeFromStoreFile(e.getValue(), targetkey, candidates, - deletes, now); + this.memstore.getRowKeyAtOrBefore(state); + // Check if match, if we got a candidate on the asked for row 'kv'. + if (!state.hasMatch()) { + // Process each store file. Run through from newest to oldest. + Map m = this.storefiles.descendingMap(); + for (Map.Entry e: m.entrySet()) { + // Update the candidate keys from the current map file + rowAtOrBeforeFromStoreFile(e.getValue(), state); + // Early out if we can. + if (state.hasMatch()) break; + } } - // Return the best key from candidateKeys - return candidates.isEmpty()? null: candidates.last(); + return state.getCandidate(); } finally { this.lock.readLock().unlock(); } @@ -1081,243 +1077,97 @@ * Check an individual MapFile for the row at or before a given key * and timestamp * @param f - * @param targetkey - * @param candidates Pass a Set with a Comparator that - * ignores key Type so we can do Set.remove using a delete, i.e. a KeyValue - * with a different Type to the candidate key. + * @param state * @throws IOException */ private void rowAtOrBeforeFromStoreFile(final StoreFile f, - final KeyValue targetkey, final NavigableSet candidates, - final NavigableSet deletes, final long now) + final GetClosestRowBeforeTracker state) throws IOException { - // if there aren't any candidate keys yet, we'll do some things different - if (candidates.isEmpty()) { - rowAtOrBeforeCandidate(f, targetkey, candidates, deletes, now); - } else { - rowAtOrBeforeWithCandidates(f, targetkey, candidates, deletes, now); - } - } - - /* - * @param ttlSetting - * @param hsk - * @param now - * @param deletes A Set whose Comparator ignores Type. - * @return True if key has not expired and is not in passed set of deletes. - */ - static boolean notExpiredAndNotInDeletes(final long ttl, - final KeyValue key, final long now, final Set deletes) { - return !isExpired(key, now-ttl) && (deletes == null || deletes.isEmpty() || - !deletes.contains(key)); - } - - static boolean isExpired(final KeyValue key, final long oldestTimestamp) { - return key.getTimestamp() < oldestTimestamp; - } - - /* Find a candidate for row that is at or before passed key, searchkey, in hfile. - * @param f - * @param targetkey Key to go search the hfile with. - * @param candidates - * @param now - * @throws IOException - * @see {@link #rowAtOrBeforeCandidate(HStoreKey, org.apache.hadoop.io.MapFile.Reader, byte[], SortedMap, long)} - */ - private void rowAtOrBeforeCandidate(final StoreFile f, - final KeyValue targetkey, final NavigableSet candidates, - final NavigableSet deletes, final long now) - throws IOException { - KeyValue search = targetkey; - // If the row we're looking for is past the end of this mapfile, set the - // search key to be the last key. If its a deleted key, then we'll back - // up to the row before and return that. - // TODO: Cache last key as KV over in the file. Reader r = f.getReader(); if (r == null) { LOG.warn("StoreFile " + f + " has a null Reader"); return; } - byte [] lastkey = r.getLastKey(); - KeyValue lastKeyValue = - KeyValue.createKeyValueFromKey(lastkey, 0, lastkey.length); - if (this.comparator.compareRows(lastKeyValue, targetkey) < 0) { - search = lastKeyValue; + byte [] fk = r.getFirstKey(); + KeyValue firstKV = KeyValue.createKeyValueFromKey(fk, 0, fk.length); + byte [] lk = r.getLastKey(); + KeyValue lastKV = KeyValue.createKeyValueFromKey(lk, 0, lk.length); + // If the row we're looking for is past the end of file, set search key to + // last key. TODO: Cache last and first key rather than make each time. + KeyValue firstOnRow = state.getTargetKey(); + if (this.comparator.compareRows(lastKV, firstOnRow) < 0) { + firstOnRow = lastKV.cloneRow(); } - KeyValue knownNoGoodKey = null; HFileScanner scanner = r.getScanner(); - for (boolean foundCandidate = false; !foundCandidate;) { - // Seek to the exact row, or the one that would be immediately before it - int result = scanner.seekTo(search.getBuffer(), search.getKeyOffset(), - search.getKeyLength()); - if (result < 0) { - // Not in file. - break; - } - KeyValue deletedOrExpiredRow = null; - KeyValue kv = null; - do { - kv = scanner.getKeyValue(); - if (this.comparator.compareRows(kv, search) <= 0) { - if (!kv.isDeleteType()) { - if (handleNonDelete(kv, now, deletes, candidates)) { - foundCandidate = true; - // NOTE! Continue. - continue; - } - } - deletes.add(kv); - if (deletedOrExpiredRow == null) { - deletedOrExpiredRow = kv; - } - } else if (this.comparator.compareRows(kv, search) > 0) { - // if the row key we just read is beyond the key we're searching for, - // then we're done. - break; - } else { - // So, the row key doesn't match, but we haven't gone past the row - // we're seeking yet, so this row is a candidate for closest - // (assuming that it isn't a delete). - if (!kv.isDeleteType()) { - if (handleNonDelete(kv, now, deletes, candidates)) { - foundCandidate = true; - // NOTE: Continue - continue; - } - } - deletes.add(kv); - if (deletedOrExpiredRow == null) { - deletedOrExpiredRow = kv; - } - } - } while(scanner.next() && (knownNoGoodKey == null || - this.comparator.compare(kv, knownNoGoodKey) < 0)); - - // If we get here and have no candidates but we did find a deleted or - // expired candidate, we need to look at the key before that - if (!foundCandidate && deletedOrExpiredRow != null) { - knownNoGoodKey = deletedOrExpiredRow; - if (!scanner.seekBefore(deletedOrExpiredRow.getBuffer(), - deletedOrExpiredRow.getKeyOffset(), - deletedOrExpiredRow.getKeyLength())) { - // Not in file -- what can I do now but break? - break; - } - search = scanner.getKeyValue(); - } else { - // No candidates and no deleted or expired candidates. Give up. - break; - } + // Seek scanner. If can't seek it, return. + if (!seekToScanner(scanner, firstOnRow, firstKV)) return; + // If we found candidate on firstOnRow, just return. + if (walkForwardInSingleRow(scanner, firstOnRow, state)) return; + // If here, need to start backing up. + while (scanner.seekBefore(firstOnRow.getBuffer(), firstOnRow.getKeyOffset(), + firstOnRow.getKeyLength())) { + KeyValue kv = scanner.getKeyValue(); + if (!state.isBetterCandidate(kv)) break; + // Make new first on row. + firstOnRow = kv.cloneRow(); + // Seek scanner. If can't seek it, break. + if (!seekToScanner(scanner, firstOnRow, firstKV)) break; + // If we find something, break; + if (walkForwardInSingleRow(scanner, firstOnRow, state)) break; } - - // Arriving here just means that we consumed the whole rest of the map - // without going "past" the key we're searching for. we can just fall - // through here. } - private void rowAtOrBeforeWithCandidates(final StoreFile f, - final KeyValue targetkey, - final NavigableSet candidates, - final NavigableSet deletes, final long now) + /** + * @param scanner + * @param firstOnRow + * @param firstKV + * @return True if we successfully seeked scanner. + * @throws IOException + */ + private boolean seekToScanner(final HFileScanner scanner, + final KeyValue firstOnRow, final KeyValue firstKV) throws IOException { - // if there are already candidate keys, we need to start our search - // at the earliest possible key so that we can discover any possible - // deletes for keys between the start and the search key. Back up to start - // of the row in case there are deletes for this candidate in this mapfile - // BUT do not backup before the first key in the store file. - KeyValue firstCandidateKey = candidates.first(); - KeyValue search = null; - if (this.comparator.compareRows(firstCandidateKey, targetkey) < 0) { - search = targetkey; - } else { - search = firstCandidateKey; - } + KeyValue kv = firstOnRow; + // If firstOnRow < firstKV, set to firstKV + if (this.comparator.compareRows(firstKV, firstOnRow) == 0) kv = firstKV; + int result = scanner.seekTo(kv.getBuffer(), kv.getKeyOffset(), + kv.getKeyLength()); + return result >= 0; + } - // Seek to the exact row, or the one that would be immediately before it - Reader r = f.getReader(); - if (r == null) { - LOG.warn("StoreFile " + f + " has a null Reader"); - return; - } - HFileScanner scanner = r.getScanner(); - int result = scanner.seekTo(search.getBuffer(), search.getKeyOffset(), - search.getKeyLength()); - if (result < 0) { - // Key is before start of this file. Return. - return; - } + /* + * When we come in here, we are probably at the kv just before we break into + * the row that firstOnRow is on. Need to increment one time to get on to + * the row we are interested in. + * @param scanner + * @param firstOnRow + * @param state + * @return True we found a candidate. + * @throws IOException + */ + private boolean walkForwardInSingleRow(final HFileScanner scanner, + final KeyValue firstOnRow, final GetClosestRowBeforeTracker state) + throws IOException { + boolean foundCandidate = false; do { KeyValue kv = scanner.getKeyValue(); - // if we have an exact match on row, and it's not a delete, save this - // as a candidate key - if (this.comparator.matchingRows(kv, targetkey)) { - handleKey(kv, now, deletes, candidates); - } else if (this.comparator.compareRows(kv, targetkey) > 0 ) { - // if the row key we just read is beyond the key we're searching for, - // then we're done. + // If we are not in the row, skip. + if (this.comparator.compareRows(kv, firstOnRow) < 0) continue; + // Did we go beyond the target row? If so break. + if (state.isTooFar(kv, firstOnRow)) break; + if (state.isExpired(kv)) { + continue; + } + // If we added something, this row is a contender. break. + if (state.handle(kv)) { + foundCandidate = true; break; - } else { - // So, the row key doesn't match, but we haven't gone past the row - // we're seeking yet, so this row is a candidate for closest - // (assuming that it isn't a delete). - handleKey(kv, now, deletes, candidates); } } while(scanner.next()); + return foundCandidate; } - /* - * Used calculating keys at or just before a passed key. - * @param readkey - * @param now - * @param deletes Set with Comparator that ignores key type. - * @param candidate Set with Comprator that ignores key type. - */ - private void handleKey(final KeyValue readkey, final long now, - final NavigableSet deletes, - final NavigableSet candidates) { - if (!readkey.isDeleteType()) { - handleNonDelete(readkey, now, deletes, candidates); - } else { - handleDeletes(readkey, candidates, deletes); - } - } - - /* - * Used calculating keys at or just before a passed key. - * @param readkey - * @param now - * @param deletes Set with Comparator that ignores key type. - * @param candidates Set with Comparator that ignores key type. - * @return True if we added a candidate. - */ - private boolean handleNonDelete(final KeyValue readkey, final long now, - final NavigableSet deletes, - final NavigableSet candidates) { - if (notExpiredAndNotInDeletes(this.ttl, readkey, now, deletes)) { - candidates.add(readkey); - return true; - } - return false; - } - /** - * Handle keys whose values hold deletes. - * Add to the set of deletes and then if the candidate keys contain any that - * might match, then check for a match and remove it. Implies candidates - * is made with a Comparator that ignores key type. - * @param k - * @param candidates - * @param deletes - * @return True if we removed k from candidates. - */ - static boolean handleDeletes(final KeyValue k, - final NavigableSet candidates, - final NavigableSet deletes) { - deletes.add(k); - return candidates.remove(k); - } - - /** * Determines if HStore can be split * @param force Whether to force a split or not. * @return a StoreSize if store can be split, null otherwise. Index: src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java =================================================================== --- src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (working copy) @@ -230,7 +230,7 @@ public boolean filterRow(List kvs) { for (KeyValue kv: kvs) { byte [] column = kv.getColumn(); - if (nullColumns.contains(column) && !kv.isDeleteType()) { + if (nullColumns.contains(column) && !kv.isDelete()) { return true; } if (!equalsMap.containsKey(column)) { Index: src/java/org/apache/hadoop/hbase/KeyValue.java =================================================================== --- src/java/org/apache/hadoop/hbase/KeyValue.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/KeyValue.java (working copy) @@ -22,6 +22,7 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.util.Comparator; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -128,7 +129,7 @@ return compare(a, 0, a.length, b, 0, b.length); } }; - + /** * Get the appropriate row comparator for the specified table. * @@ -291,11 +292,12 @@ /** * Constructs KeyValue structure filled with null value. + * Sets type to {@link KeyValue.Type#Maximum} * @param row - row key (arbitrary byte array) * @param timestamp */ public KeyValue(final byte [] row, final long timestamp) { - this(row, timestamp, Type.Put); + this(row, timestamp, Type.Maximum); } /** @@ -309,13 +311,14 @@ /** * Constructs KeyValue structure filled with null value. + * Sets type to {@link KeyValue.Type#Maximum} * @param row - row key (arbitrary byte array) * @param family family name * @param qualifier column qualifier */ public KeyValue(final byte [] row, final byte [] family, final byte [] qualifier) { - this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Put); + this(row, family, qualifier, HConstants.LATEST_TIMESTAMP, Type.Maximum); } /** @@ -569,21 +572,30 @@ * @return Fully copied clone of this KeyValue */ public KeyValue clone() { - byte [] bytes = new byte[this.length]; - System.arraycopy(this.bytes, this.offset, bytes, 0, this.length); - return new KeyValue(bytes, 0, bytes.length); + byte [] b = new byte[this.length]; + System.arraycopy(this.bytes, this.offset, b, 0, this.length); + return new KeyValue(b, 0, b.length); } /** * Clones a row. + * @return Clone of bb's key portion with only row and + * {@link HConstants#LATEST_TIMESTAMP} filled in. + */ + public KeyValue cloneRow() { + return cloneRow(HConstants.LATEST_TIMESTAMP); + } + + /** + * Clones a row. * * @param timestamp The new time stamp for the row. - * @return Clone of bb's key portion with only the row and timestamp filled in. + * @return Clone of bb's key portion with only row and timestamp filled in. */ public KeyValue cloneRow(final long timestamp) { return new KeyValue(getBuffer(), getRowOffset(), getRowLength(), null, 0, 0, null, 0, 0, - timestamp, Type.codeToType(getType()), null, 0, 0); + timestamp, Type.Maximum, null, 0, 0); } /** @@ -946,17 +958,20 @@ } /** - * @return True if Delete KeyValue type. + * @return True if a delete type, a {@link KeyValue.Type#Delete} or + * a {KeyValue.Type#DeleteFamily} or a {@link KeyValue.Type.DeleteColumn} + * KeyValue type. */ - public boolean isDeleteType() { - return getType() == Type.Delete.code; + public boolean isDelete() { + int t = getType(); + return Type.Delete.getCode() <= t && t <= Type.DeleteFamily.getCode(); } /** - * @return True if DeleteColumn KeyValue type. + * @return True if this KV is a delete family type. */ - public boolean isDeleteColumnType() { - return getType() == Type.DeleteColumn.code; + public boolean isDeleteFamily() { + return getType() == Type.DeleteFamily.getCode(); } /** @@ -1659,6 +1674,21 @@ } /** + * Comparator that compares row component only of a KeyValue. + */ + public static class RowComparator implements Comparator { + final KVComparator comparator; + + public RowComparator(final KVComparator c) { + this.comparator = c; + } + + public int compare(KeyValue left, KeyValue right) { + return comparator.compareRows(left, right); + } + } + + /** * Compare key portion of a {@link KeyValue} for keys in .META. * table. */ Index: src/java/org/apache/hadoop/hbase/util/Bytes.java =================================================================== --- src/java/org/apache/hadoop/hbase/util/Bytes.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/util/Bytes.java (working copy) @@ -242,7 +242,7 @@ * @return String made from b */ public static String toString(final byte [] b) { - if(b == null) { + if (b == null) { return null; } return toString(b, 0, b.length); Index: src/java/org/apache/hadoop/hbase/client/HConnectionManager.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (working copy) @@ -577,7 +577,7 @@ REGIONINFO_QUALIFIER); if (value == null || value.length == 0) { throw new IOException("HRegionInfo was null or empty in " + - Bytes.toString(parentTable) + ", " + regionInfoRow); + Bytes.toString(parentTable) + ", row=" + regionInfoRow); } // convert the row result into the HRegionLocation we need! HRegionInfo regionInfo = (HRegionInfo) Writables.getWritable( Index: src/java/org/apache/hadoop/hbase/client/Result.java =================================================================== --- src/java/org/apache/hadoop/hbase/client/Result.java (revision 803766) +++ src/java/org/apache/hadoop/hbase/client/Result.java (working copy) @@ -499,4 +499,4 @@ } return results; } -} +} \ No newline at end of file