From f0574e647224c2feb7f14acbffc52841ee54526a Mon Sep 17 00:00:00 2001 From: Nick Dimiduk Date: Thu, 9 Oct 2014 18:23:25 -0700 Subject: [PATCH] HBASE-12222 WIP FuzzyRowFilter unpredictable with jagged rowkeys --- .../hadoop/hbase/filter/TestFuzzyRowFilter.java | 293 +++++++++++++++++++++ 1 file changed, 293 insertions(+) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java index 9f83444..c6e653d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java @@ -17,16 +17,303 @@ */ package org.apache.hadoop.hbase.filter; +import com.google.common.base.Function; +import com.google.common.base.Objects; +import com.google.common.collect.Collections2; +import com.google.common.collect.Lists; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Durability; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RegionScanner; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.AfterClass; import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; +import java.util.TreeSet; + @Category(SmallTests.class) public class TestFuzzyRowFilter { + private static final Log LOG = LogFactory.getLog(TestFuzzyRowFilter.class); + + private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final byte[] CF = Bytes.toBytes("f"); + private static final byte[] QUAL = Bytes.toBytes("qual"); + private static final byte[] VAL = Bytes.toBytes("val"); + + private static HRegion REGION_1 = null; + private static HRegion REGION_2 = null; + + private static final Set REGION_1_ROWS_1 = new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("testRowOne-0")); + add(Bytes.toBytes("testRowOne-1")); + add(Bytes.toBytes("testRowOne-2")); + add(Bytes.toBytes("testRowOne-3")); + }}; + + private static final Set REGION_1_ROWS_2 = new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("testRowTwo-0")); + add(Bytes.toBytes("testRowTwo-1")); + add(Bytes.toBytes("testRowTwo-2")); + add(Bytes.toBytes("testRowTwo-3")); + }}; + + private static final Set REGION_1_ROWS_3 = new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("testRowThree-0")); + add(Bytes.toBytes("testRowThree-1")); + add(Bytes.toBytes("testRowThree-2")); + add(Bytes.toBytes("testRowThree-3")); + }}; + + private static final Set REGION_1_ROWS_4 = new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("testRowFour-0")); + add(Bytes.toBytes("testRowFour-1")); + add(Bytes.toBytes("testRowFour-2")); + add(Bytes.toBytes("testRowFour-3")); + }}; + + private static final Set REGION_1_ROWS = new TreeSet(Bytes.BYTES_COMPARATOR) {{ + addAll(REGION_1_ROWS_1); + addAll(REGION_1_ROWS_2); + addAll(REGION_1_ROWS_3); + addAll(REGION_1_ROWS_4); + }}; + + /** + * Creates a relatively fixed-width schema: + *
+   * testRowFour-0
+   * testRowFour-1
+   * testRowFour-2
+   * testRowFour-3
+   * testRowOne-0
+   * testRowOne-1
+   * testRowOne-2
+   * testRowOne-3
+   * ...
+   * 
+ */ + private static HRegion createRegion1(TableName table) throws IOException { + HTableDescriptor htd = new HTableDescriptor(table); + htd.addFamily(new HColumnDescriptor(CF)); + HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false); + HRegion r = HRegion.createHRegion(info, TEST_UTIL.getDataTestDir(), + TEST_UTIL.getConfiguration(), htd); + + for (byte[] row : REGION_1_ROWS) { + Put p = new Put(row); + p.setDurability(Durability.SKIP_WAL); + p.add(CF, QUAL, VAL); + r.put(p); + } + r.flushcache(); + return r; + } + + /** + * Create variable length schema: + *
+   * 0
+   * 0/0
+   * 0/1
+   * 0/2
+   * 1
+   * 1/0
+   * 1/1
+   * 1/2
+   * ...
+   * 2/2
+   * 
+ */ + private static HRegion createRegion2(TableName table) throws IOException { + HTableDescriptor htd = new HTableDescriptor(table); + htd.addFamily(new HColumnDescriptor(CF)); + HRegionInfo info = new HRegionInfo(htd.getTableName(), null, null, false); + HRegion r = HRegion.createHRegion(info, TEST_UTIL.getDataTestDir(), + TEST_UTIL.getConfiguration(), htd); + + for (int i = 0; i < 3; i++) { + String i_s = Integer.toString(i); + Put p = new Put(Bytes.toBytes(i_s)); + p.add(CF, QUAL, VAL); + r.put(p); + for (int j = 0; j < 3; j++) { + String j_s = Integer.toString(j); + p = new Put(Bytes.toBytes(String.format("%s/%s", i_s, j_s))); + p.add(CF, QUAL, VAL); + r.put(p); + } + } + r.flushcache(); + return r; + } + + private static void cleanupRegion(HRegion region) throws IOException { + if (region == null) { + Assert.fail("Should never get here."); + } + HRegion.closeHRegion(region); + } + + @BeforeClass + public static void beforeClass() throws IOException { + REGION_1 = createRegion1(TableName.valueOf("TestFuzzyRowFilter1")); + REGION_2 = createRegion2(TableName.valueOf("TestFuzzyRowFilter2")); + } + + @AfterClass + public static void afterClass() throws IOException { + cleanupRegion(REGION_1); + cleanupRegion(REGION_2); + } + + /** Count the number of rowkeys produced when scanning over {@code s}. */ + private Set getRows(HRegion region, Scan s) throws IOException { + LOG.info(Objects.toStringHelper(region.getTableDesc().getTableName().toString()) + .add("scan", s.toString())); + RegionScanner scanner = null; + try { + scanner = region.getScanner(s); + List results = new ArrayList(); + boolean hasMore = false; + Set ret = new TreeSet(Bytes.BYTES_COMPARATOR); + do { + results.clear(); + hasMore = scanner.next(results); + if (results.size() > 0) { + Cell c = results.get(0); + LOG.info("received result row: " + Bytes.toString(c.getRowArray(), c.getRowOffset(), + c.getRowLength())); + byte[] row = new byte[c.getRowLength()]; + System.arraycopy(c.getRowArray(), c.getRowOffset(), row, 0, c.getRowLength()); + ret.add(row); + } + } while (hasMore); + return ret; + } finally { + if (scanner != null) scanner.close(); + } + } + + private String[] setToStrings(Set s) { + String[] a = new String[s.size()]; + return Collections2.transform(s, new Function(){{} + @Override + public String apply(byte[] input) { + return Bytes.toStringBinary(input); + } + }).toArray(a); + } + + private void assertFuzzyRow(String message, HRegion region, byte[] fuzzyKey, byte[] mask, + Set expected, boolean reverse) throws IOException { + Pair pair = new Pair(fuzzyKey, mask); + Filter f = new FuzzyRowFilter(Lists.newArrayList(pair)); + Scan s = new Scan(); + s.setFilter(f); + s.setReversed(reverse); + String[] e = new String[expected.size()]; + Assert.assertArrayEquals(message, setToStrings(expected), setToStrings(getRows(region, s))); + } + + private void doTestRegion1(boolean reverse) throws IOException { + LOG.info("Running region1 test " + (reverse ? "reverse" : "forward")); + assertFuzzyRow("simple prefix at beginning of table", REGION_1, + Bytes.toBytes("testRow?"), + new byte[]{0, 0, 0, 0, 0, 0, 0, 1}, + REGION_1_ROWS, + reverse); + assertFuzzyRow("testRow? should be the same as a PrefixFilter(testRow)", REGION_1, + Bytes.toBytes("testRow?"), + new byte[]{0, 0, 0, 0, 0, 0, 0, 1}, + getRows(REGION_1, new Scan().setFilter(new PrefixFilter(Bytes.toBytes("testRow")))), + reverse); + assertFuzzyRow("just testRowOne, testRowTwo", REGION_1, + Bytes.toBytes("testRow???-?"), + new byte[]{0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1}, + new TreeSet(Bytes.BYTES_COMPARATOR) {{ + addAll(REGION_1_ROWS_1); + addAll(REGION_1_ROWS_2); + }}, + reverse); + assertFuzzyRow("simple prefix in middle of table", REGION_1, + Bytes.toBytes("testRowThree-?"), + new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, + REGION_1_ROWS_3, + reverse); + assertFuzzyRow("testRowThree-? should be the same as (testRowThree, testRowTwo]", REGION_1, + Bytes.toBytes("testRowThree-?"), + new byte[]{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, + getRows(REGION_1, new Scan() + .setStartRow(Bytes.toBytes("testRowThree")) + .setStopRow(Bytes.toBytes("testRowTwo"))), + reverse); + } + + @Test + public void testRegion1() throws IOException { + doTestRegion1(false); + doTestRegion1(true); + } + + private void doTestRegion2(boolean reverse) throws IOException { + LOG.info("Running region2 test " + (reverse ? "reverse" : "forward")); + assertFuzzyRow("should be '1', '1/0', '1/1', '1/2'", REGION_2, + Bytes.toBytes("1"), new byte[] { 0 }, + new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("1")); + add(Bytes.toBytes("1/0")); + add(Bytes.toBytes("1/1")); + add(Bytes.toBytes("1/2")); + }}, reverse); + assertFuzzyRow("should be '1/0', '1/1', '1/2'", REGION_2, + Bytes.toBytes("1/"), new byte[] { 0, 0 }, + new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("1/0")); + add(Bytes.toBytes("1/1")); + add(Bytes.toBytes("1/2")); + }}, reverse); + assertFuzzyRow("should be '0/2', '1/2', '2/2'", REGION_2, + Bytes.toBytes("?/2"), new byte[] { 1, 0, 0 }, + new TreeSet(Bytes.BYTES_COMPARATOR) {{ + add(Bytes.toBytes("0/2")); + add(Bytes.toBytes("1/2")); + add(Bytes.toBytes("2/2")); + }}, reverse); + } + + @Test + public void testRegion2() throws IOException { + doTestRegion2(false); + doTestRegion2(true); + } + @Test public void testSatisfiesForward() { + // nextHint should return {1} + Assert.assertEquals(FuzzyRowFilter.SatisfiesCode.NEXT_EXISTS, + FuzzyRowFilter.satisfies(new byte[]{0}, // row to check + new byte[]{0, 0}, // fuzzy row + new byte[]{1, 0})); // mask + Assert.assertEquals(FuzzyRowFilter.SatisfiesCode.NEXT_EXISTS, FuzzyRowFilter.satisfies(false, new byte[]{1, (byte) -128, 0, 0, 1}, // row to check @@ -166,6 +453,12 @@ public class TestFuzzyRowFilter { @Test public void testGetNextForFuzzyRuleForward() { assertNext(false, + new byte[]{0, 0}, + new byte[]{1, 0}, + new byte[]{0}, + new byte[]{1}); + + assertNext(false, new byte[]{0, 1, 2}, // fuzzy row new byte[]{1, 0, 0}, // mask new byte[]{1, 2, 1, 0, 1}, // current -- 1.9.0