Index: hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java (revision 1371304) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java (working copy) @@ -18,12 +18,16 @@ package org.apache.hadoop.hbase.filter; import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.client.Scan; import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; @Category(SmallTests.class) public class TestFuzzyRowFilter { + // max byte value (lexographically) + private static final byte M = (byte) 255; + @Test public void testSatisfies() { Assert.assertEquals(FuzzyRowFilter.SatisfiesCode.NEXT_EXISTS, @@ -123,26 +127,26 @@ assertNext( new byte[]{0, 1, 0, 0}, // fuzzy row new byte[]{1, 0, 1, 1}, // mask - new byte[]{5, 1, (byte) 255, 1}, // current - new byte[]{5, 1, (byte) 255, 2}); // expected next + new byte[]{5, 1, M, 1}, // current + new byte[]{5, 1, M, 2}); // expected next assertNext( new byte[]{0, 1, 0, 1}, // fuzzy row new byte[]{1, 0, 1, 0}, // mask - new byte[]{5, 1, (byte) 255, 1}, // current + new byte[]{5, 1, M, 1}, // current new byte[]{6, 1, 0, 1}); // expected next assertNext( new byte[]{0, 1, 0, 1}, // fuzzy row new byte[]{1, 0, 1, 0}, // mask - new byte[]{5, 1, (byte) 255, 0}, // current - new byte[]{5, 1, (byte) 255, 1}); // expected next + new byte[]{5, 1, M, 0}, // current + new byte[]{5, 1, M, 1}); // expected next assertNext( new byte[]{5, 1, 1, 0}, new byte[]{0, 0, 1, 1}, - new byte[]{5, 1, (byte) 255, 1}, - new byte[]{5, 1, (byte) 255, 2}); + new byte[]{5, 1, M, 1}, + new byte[]{5, 1, M, 2}); assertNext( new byte[]{1, 1, 1, 1}, @@ -198,6 +202,58 @@ Assert.assertArrayEquals(expected, nextForFuzzyRule); } + @Test + public void testGetNextForFuzzyRangeRule() { + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 1, 7, 4, 5, 0, 6}, + new byte[]{1, 2, 3, 0, 1, 7, 4, 5, 0, 7}); + + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 1, 7, 4, 5, 0, 9}, + new byte[]{1, 2, 3, 0, 1, 7, 4, 6, 0, 1}); + + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 1, 7, M, M, 0, 9}, + new byte[]{1, 2, 3, 0, 1, 8, 0, 0, 0, 1}); + + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 1, M, M, M, 0, 9}, + new byte[]{1, 2, 3, 0, 2, 0, 0, 0, 0, 1}); + + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 8, M-1, M, M, 0, 9}, + new byte[]{1, 2, 3, 0, 8, M, 0, 0, 0, 1}); + + assertNextForRange( + new byte[]{0, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + new byte[]{0, 0, 0, 0, 9, 9, 0, 0, 0, 9}, + new byte[]{1, 1, 1, 0, 0, 0, 1, 1, 0, 0}, + new byte[]{1, 2, 3, 0, 9, 9, M, M, 0, 9}, + new byte[]{1, 2, 4, 0, 0, 1, 0, 0, 0, 1}); + } + + private void assertNextForRange(byte[] fuzzyRowFirst, byte[] fuzzyRowSecond, byte[] mask, + byte[] current, byte[] expected) { + byte[] nextForFuzzyRule = + FuzzyRowFilter.getNextForFuzzyRangeRule(current, fuzzyRowFirst, fuzzyRowSecond, mask); + Assert.assertArrayEquals(expected, nextForFuzzyRule); + } + @org.junit.Rule public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java (revision 1371304) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java (working copy) @@ -183,6 +183,7 @@ return satisfies(row, 0, row.length, fuzzyKeyBytes, fuzzyKeyMeta); } + // TODO: do we need offset & length? private static SatisfiesCode satisfies(byte[] row, int offset, int length, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { if (row == null) { @@ -235,6 +236,7 @@ * @return greater byte array than given (row) which satisfies the fuzzy rule if it exists, * null otherwise */ + // TODO: do we need offset & length? private static byte[] getNextForFuzzyRule(byte[] row, int offset, int length, byte[] fuzzyKeyBytes, byte[] fuzzyKeyMeta) { // To find out the next "smallest" byte array that satisfies fuzzy rule and "greater" than @@ -291,4 +293,75 @@ return result; } + public static byte[] getNextForFuzzyRangeRule(byte[] row, + byte[] fuzzyKeyBytesFirst, byte[] fuzzyKeyBytesSecond, + byte[] fuzzyKeyMeta) { + + // TODO: handle case when given row is smaller than fuzzy keys. Is this handled by the line below? + byte[] result = Arrays.copyOf(row, + row.length > fuzzyKeyBytesFirst.length ? row.length : fuzzyKeyBytesFirst.length); + + int toInc = -1; + boolean canChangeFixed = false; + for (int i = 0; i < result.length; i++) { + // a) not fixed byte case + if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) { + canChangeFixed = false; + if (!isMax(result[i])) { + toInc = i; + } + continue; + } + + // b) fixed byte case + if (!canChangeFixed) { + if (result[i] > fuzzyKeyBytesSecond[i]) { + // We encountered bigger byte than fixed which we cannot change. + // Now it depends on toInc: if it is -1, then our row is bigger than any which satisfies + // the fuzzy rule + break; + } + // by algorithm we should check if row[i] is not at max value, but we can omit that since + // its value less than some others - it is not at max value ;) + if (result[i] < fuzzyKeyBytesSecond[i]) { + canChangeFixed = true; + toInc = i; + } + } else { + if (!isMax(result[i])) { + toInc = i; + } + } + } + + if (toInc < 0) { + // our row is bigger than any which satisfies the fuzzy rule + return null; + } + + result[toInc]++; + + // Setting all "non-fixed" positions to zeroes to the right of the one we increased so + // that found "next" row key is the smallest possible + // If toInc is "fixed" position then setting all fixed from this group to zeroes and all fixed + // in other groups to the smallest values from fuzzyKeyBytesFirst + boolean increasedFixedPosition = fuzzyKeyMeta[toInc] == 0; + boolean setBytesOfThisFixedGroupToZeroes = increasedFixedPosition; + for (int i = toInc + 1; i < result.length; i++) { + if (i >= fuzzyKeyMeta.length || fuzzyKeyMeta[i] == 1) { + result[i] = 0; + // resetting, so that next groups are set to their minimum values, but not zeroes + setBytesOfThisFixedGroupToZeroes = false; + continue; + } + + if (setBytesOfThisFixedGroupToZeroes) { + result[i] = 0; + } else { + result[i] = fuzzyKeyBytesFirst[i]; + } + } + + return result; + } }