Index: src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java =================================================================== --- src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java (revision 1170860) +++ src/test/java/org/apache/hadoop/hbase/filter/TestFilterList.java (working copy) @@ -33,6 +33,7 @@ import junit.framework.TestCase; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.filter.Filter.ReturnCode; import org.apache.hadoop.hbase.filter.FilterList.Operator; import org.apache.hadoop.hbase.util.Bytes; @@ -263,7 +264,7 @@ Filter filterMaxHint = new FilterBase() { @Override public KeyValue getNextKeyHint(KeyValue currentKV) { - return new KeyValue(Bytes.toBytes(Long.MAX_VALUE), null, null); + return maxKeyValue; } @Override @@ -324,5 +325,235 @@ Arrays.asList(new Filter [] { filterNoHint, filterMinHint } )); assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null), minKeyValue)); + + // MUST PASS ALL w/ HINT CHANGES ON filterKV call + + final KeyValue badStateKeyValue = new KeyValue(Bytes.toBytes(0L), null, + Bytes.toBytes("badstate")); + final KeyValue goodStateKeyValue = new KeyValue(Bytes.toBytes(0L), null, + Bytes.toBytes("goodstate")); + + Filter filterStateChanger = new FilterBase() { + private boolean stateChanged = false; + + @Override + public ReturnCode filterKeyValue(KeyValue kv) { + stateChanged = true; + return ReturnCode.SEEK_NEXT_USING_HINT; + } + + @Override + public KeyValue getNextKeyHint(KeyValue currentKV) { + return stateChanged ? goodStateKeyValue : badStateKeyValue; + } + + @Override + public void readFields(DataInput arg0) throws IOException {} + + @Override + public void write(DataOutput arg0) throws IOException {} + }; + + // State changer first in the filter list + filterList = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(new Filter [] { filterStateChanger, filterNoHint } )); + filterList.filterKeyValue(minKeyValue); + assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null), + goodStateKeyValue)); + + // State changer last in the filter list + filterList = new FilterList(Operator.MUST_PASS_ALL, + Arrays.asList(new Filter [] { filterNoHint, filterStateChanger } )); + filterList.filterKeyValue(minKeyValue); + assertEquals(0, KeyValue.COMPARATOR.compare(filterList.getNextKeyHint(null), + goodStateKeyValue)); } + + static class ReturnCodeFilter extends FilterBase { + final ReturnCode rc; + + public ReturnCodeFilter(ReturnCode rc) { + this.rc = rc; + } + + @Override + public ReturnCode filterKeyValue(KeyValue v) { + return rc; + } + + @Override + public void readFields(DataInput arg0) throws IOException {} + + @Override + public void write(DataOutput arg0) throws IOException {} + } + + /** + * Test different combinations of filterKV return codes. + * @throws Exception + */ + public void testFilterKeyValueCombos() throws Exception { + + /** + * FilterKeyValue returns a ReturnCode which is one of: + * + * INCLUDE, SKIP, NEXT_COL, NEXT_ROW, SEEK_NEXT_USING_HINT + * + * For an AND condition, you want to keep the most restrictive return code. + * For an OR condition, you want to keep the least restrictive return code. + * + * From least restrictive to most restrictive: + * + * INCLUDE, SKIP, NEXT_COL, SEEK_NEXT_USING_HINT, NEXT_ROW + * + * It's possible that NEXT_COL and SEEK_NEXT_USING_HINT are reversed + * depending on the hint. It is probably more common that seek hints go far + * down a row so it is assumed they are more restrictive. If this is wrong, + * then the query may execute in a suboptimal way but there is no issue with + * correctness. For an AND condition, you can actually use the result of + * any filterKV call and still be correct. For an OR condition, it is + * critical that you pick the least restrictive, so in the case of a SEEK + * and a NEXT_COL, we will fallback to a SKIP. + */ + + Filter includeFilter = new ReturnCodeFilter(ReturnCode.INCLUDE); + Filter skipFilter = new ReturnCodeFilter(ReturnCode.SKIP); + Filter nextColFilter = new ReturnCodeFilter(ReturnCode.NEXT_COL); + Filter seekFilter = new ReturnCodeFilter(ReturnCode.SEEK_NEXT_USING_HINT); + Filter nextRowFilter = new ReturnCodeFilter(ReturnCode.NEXT_ROW); + + // MUST PASS ALL (AND) + + // INCLUDE + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter }); + + // SKIP + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ALL, + new Filter [] { skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ALL, + new Filter [] { skipFilter, includeFilter }); + + // NEXT_COL + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, skipFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { nextColFilter, includeFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { skipFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { nextColFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ALL, + new Filter [] { nextColFilter, includeFilter }); + + // SEEK + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, skipFilter, nextColFilter, seekFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { seekFilter, includeFilter, skipFilter, nextColFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, skipFilter, seekFilter, nextColFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, skipFilter, seekFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, seekFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, seekFilter, includeFilter }); + + // NEXT ROW + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, seekFilter, skipFilter, nextRowFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ALL, + new Filter [] { nextRowFilter, includeFilter, seekFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ALL, + new Filter [] { includeFilter, seekFilter, nextRowFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ALL, + new Filter [] { nextRowFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ALL, + new Filter [] { skipFilter, nextRowFilter, includeFilter, seekFilter }); + + // MUST PASS ONE (OR) + + // NEXT_ROW + assertFilterKeyValueReturnCode(ReturnCode.NEXT_ROW, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter }); + + // SEEK + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ONE, + new Filter [] { seekFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, seekFilter }); + assertFilterKeyValueReturnCode( + ReturnCode.SEEK_NEXT_USING_HINT, Operator.MUST_PASS_ONE, + new Filter [] { seekFilter, nextRowFilter }); + + // NEXT_COL + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, seekFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { nextColFilter, nextRowFilter, seekFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { seekFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { nextColFilter, seekFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.NEXT_COL, Operator.MUST_PASS_ONE, + new Filter [] { nextColFilter, nextRowFilter }); + + // SKIP + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, seekFilter, nextColFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { skipFilter, nextRowFilter, seekFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, seekFilter, skipFilter, nextColFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, seekFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.SKIP, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, skipFilter, nextRowFilter }); + + // INCLUDE + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ONE, + new Filter [] { nextRowFilter, skipFilter, seekFilter, includeFilter }); + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ONE, + new Filter [] { includeFilter, nextRowFilter, seekFilter, skipFilter }); + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ONE, + new Filter [] { includeFilter, skipFilter, nextRowFilter }); + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ONE, + new Filter [] { includeFilter, seekFilter }); + assertFilterKeyValueReturnCode(ReturnCode.INCLUDE, Operator.MUST_PASS_ONE, + new Filter [] { skipFilter, nextRowFilter, includeFilter, seekFilter }); + } + + /** + * Verifies that putting the specified filters into a FilterList with the + * specified operator will return the specified return code when the + * filterKeyValue method is called. + * + * @param code the expected return code + * @param operator the filter list operator to use + * @param filters the filters to put in a list + */ + private void assertFilterKeyValueReturnCode(ReturnCode code, + Operator operator, Filter [] filters) { + FilterList filterList = new FilterList(operator, Arrays.asList(filters)); + ReturnCode returned = filterList.filterKeyValue(null); + assertTrue("Expected code (" + code + ") but returned (" + returned + ")", + code == returned); + } } Index: src/main/java/org/apache/hadoop/hbase/filter/FilterList.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/filter/FilterList.java (revision 1170860) +++ src/main/java/org/apache/hadoop/hbase/filter/FilterList.java (working copy) @@ -182,36 +182,32 @@ @Override public ReturnCode filterKeyValue(KeyValue v) { - ReturnCode rc = operator == Operator.MUST_PASS_ONE? - ReturnCode.SKIP: ReturnCode.INCLUDE; + // Default MUST_PASS_ONE is most restrictive possible (and then we take the + // least restrictive seen). + // Default MUST_PASS_ALL is least restrictive possible (and then we take the + // most restrictive seen). + ReturnCode rc = operator == Operator.MUST_PASS_ONE ? + ReturnCode.NEXT_ROW: ReturnCode.INCLUDE; for (Filter filter : filters) { if (operator == Operator.MUST_PASS_ALL) { if (filter.filterAllRemaining()) { - return ReturnCode.NEXT_ROW; - } - ReturnCode code = filter.filterKeyValue(v); - switch (code) { - case INCLUDE: + rc = ReturnCode.NEXT_ROW; continue; - case NEXT_ROW: - case SKIP: - return ReturnCode.SKIP; - default: - return code; } + ReturnCode code = filter.filterKeyValue(v); + // If this code is more restrictive than the currently set code, + // update the current return code to this more restrictive one + if (code.isMoreRestrictive(rc)) rc = code; + // Continue in all cases so all filters get called } else if (operator == Operator.MUST_PASS_ONE) { if (filter.filterAllRemaining()) { continue; } - - switch (filter.filterKeyValue(v)) { - case INCLUDE: - rc = ReturnCode.INCLUDE; - // must continue here to evaluate all filters - case NEXT_ROW: - case SKIP: - // continue; - } + ReturnCode code = filter.filterKeyValue(v); + // If this code is less restrictive than the currently set code, + // update the current return code to this less restrictive one + if (code.isLessRestrictive(rc)) rc = code; + // Continue in all cases so all filters get called } } return rc; Index: src/main/java/org/apache/hadoop/hbase/filter/Filter.java =================================================================== --- src/main/java/org/apache/hadoop/hbase/filter/Filter.java (revision 1170860) +++ src/main/java/org/apache/hadoop/hbase/filter/Filter.java (working copy) @@ -96,26 +96,52 @@ /** * Include the KeyValue */ - INCLUDE, + INCLUDE (1), /** * Skip this KeyValue */ - SKIP, + SKIP (2), /** * Skip this column. Go to the next column in this row. */ - NEXT_COL, + NEXT_COL (3), /** + * Seek to next key which is given as hint by the filter. + */ + SEEK_NEXT_USING_HINT (4), + /** * Done with columns, skip to next row. Note that filterRow() will * still be called. */ - NEXT_ROW, + NEXT_ROW (5); + + final int index; + + ReturnCode(int index) { + this.index = index; + } + /** - * Seek to next key which is given as hint by the filter. + * Returns true if this return code is more restrictive than the specified + * return code. + * @param other + * @return true if this is more restrictive than that specified */ - SEEK_NEXT_USING_HINT, -} + public boolean isMoreRestrictive(ReturnCode other) { + return this.index > other.index; + } + /** + * Returns true if this return code is less restrictive than the specified + * return code. + * @param other + * @return true if this is less restrictive than that specified + */ + public boolean isLessRestrictive(ReturnCode other) { + return this.index < other.index; + } + } + /** * Chance to alter the list of keyvalues to be submitted. * Modifications to the list will carry on