Index: src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java =================================================================== --- src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java (revision 557761) +++ src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRegExpRowFilter.java (working copy) @@ -19,16 +19,21 @@ */ package org.apache.hadoop.hbase.filter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; import java.util.Map; import java.util.TreeMap; import junit.framework.TestCase; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.io.Text; public class TestRegExpRowFilter extends TestCase { TreeMap colvalues; - RowFilterInterface filter; + RowFilterInterface mainFilter; final char FIRST_CHAR = 'a'; final char LAST_CHAR = 'e'; byte [] GOOD_BYTES = "abc".getBytes(); @@ -41,10 +46,43 @@ for (char c = FIRST_CHAR; c < LAST_CHAR; c++) { colvalues.put(new Text(new String(new char [] {c})), GOOD_BYTES); } - this.filter = new RegExpRowFilter(HOST_PREFIX + ".*", colvalues); + this.mainFilter = new RegExpRowFilter(HOST_PREFIX + ".*", colvalues); } public void testRegexOnRow() throws Exception { + regexRowTests(mainFilter); + } + + public void testRegexOnRowAndColumn() throws Exception { + regexRowColumnTests(mainFilter); + } + + public void testFilterNotNull() throws Exception { + filterNotNullTests(mainFilter); + } + + public void testSerialization() throws Exception { + // Decompose mainFilter to bytes. + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(stream); + mainFilter.write(out); + out.close(); + byte[] buffer = stream.toByteArray(); + + // Recompose filter. + DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer)); + RowFilterInterface newFilter = new RegExpRowFilter(); + newFilter.readFields(in); + + // Ensure the serialization preserved the filter by running all test. + regexRowTests(newFilter); + newFilter.reset(); + regexRowColumnTests(newFilter); + newFilter.reset(); + filterNotNullTests(newFilter); + } + + private void regexRowTests(RowFilterInterface filter) throws Exception { for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) { Text t = createRow(c); assertFalse("Failed with characer " + c, filter.filter(t)); @@ -54,12 +92,12 @@ yahooSite, filter.filter(new Text(yahooSite))); } - public void testRegexOnRowAndColumn() throws Exception { + private void regexRowColumnTests(RowFilterInterface filter) { for (char c = FIRST_CHAR; c <= LAST_CHAR; c++) { Text t = createRow(c); for (Map.Entry e: this.colvalues.entrySet()) { assertFalse("Failed on " + c, - this.filter.filter(t, e.getKey(), e.getValue())); + filter.filter(t, e.getKey(), e.getValue())); } } // Try a row and column I know will pass. @@ -68,17 +106,60 @@ Text col = new Text(Character.toString(c)); assertFalse("Failed with character " + c, filter.filter(r, col, GOOD_BYTES)); + // Do same but with bad bytes. assertTrue("Failed with character " + c, filter.filter(r, col, "badbytes".getBytes())); + // Do with good bytes but bad column name. Should not filter out. assertFalse("Failed with character " + c, filter.filter(r, new Text("badcolumn"), GOOD_BYTES)); + // Good column, good bytes but bad row. assertTrue("Failed with character " + c, filter.filter(new Text("bad row"), new Text("badcolumn"), GOOD_BYTES)); } - + + private void filterNotNullTests(RowFilterInterface filter) throws Exception { + // Modify the filter to expect certain columns to be null: + // Expecting a row WITH columnKeys: a-d, WITHOUT columnKey: e + ((RegExpRowFilter)filter).setColumnFilter(new Text(new String(new char[] { + LAST_CHAR })), null); + + char secondToLast = (char)(LAST_CHAR - 1); + char thirdToLast = (char)(LAST_CHAR - 2); + + // Modify the row to be missing an expected columnKey (d) + colvalues.remove(new Text(new String(new char[] { secondToLast }))); + + // Try a row that is missing an expected columnKey. + // Testing row with columnKeys: a-c + assertTrue("Failed with last columnKey " + thirdToLast, filter. + filterNotNull(colvalues)); + + // Try a row that has all expected columnKeys, and NO null-expected + // columnKeys. + // Testing row with columnKeys: a-d + colvalues.put(new Text(new String(new char[] { secondToLast })), + GOOD_BYTES); + assertFalse("Failed with last columnKey " + secondToLast, filter. + filterNotNull(colvalues)); + + // Try a row that has all expected columnKeys AND a null-expected columnKey. + // Testing row with columnKeys: a-e + colvalues.put(new Text(new String(new char[] { LAST_CHAR })), GOOD_BYTES); + assertTrue("Failed with last columnKey " + LAST_CHAR, filter. + filterNotNull(colvalues)); + + // Try a row that has all expected columnKeys and a null-expected columnKey + // that maps to a null value. + // Testing row with columnKeys: a-e, e maps to null + colvalues.put(new Text(new String(new char[] { LAST_CHAR })), + HConstants.DELETE_BYTES.get()); + assertFalse("Failed with last columnKey " + LAST_CHAR + " mapping to null.", + filter.filterNotNull(colvalues)); + } + private Text createRow(final char c) { return new Text(HOST_PREFIX + Character.toString(c)); } Index: src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRowFilterSet.java =================================================================== --- src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRowFilterSet.java (revision 0) +++ src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestRowFilterSet.java (revision 0) @@ -0,0 +1,159 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.util.HashSet; +import java.util.Set; +import java.util.TreeMap; + +import org.apache.hadoop.io.Text; + + +import junit.framework.TestCase; + +public class TestRowFilterSet extends TestCase { + + RowFilterInterface filterMPALL; + RowFilterInterface filterMPONE; + static final int MAX_PAGES = 5; + final char FIRST_CHAR = 'a'; + final char LAST_CHAR = 'e'; + final byte[] GOOD_BYTES = "abc".getBytes(); + final byte[] BAD_BYTES = "def".getBytes(); + TreeMap colvalues; + + protected void setUp() throws Exception { + super.setUp(); + + colvalues = new TreeMap(); + for (char c = FIRST_CHAR; c < LAST_CHAR; c++) { + colvalues.put(new Text(new String(new char [] {c})), GOOD_BYTES); + } + + Set filters = new HashSet(); + filters.add(new PageRowFilter(MAX_PAGES)); + filters.add(new RegExpRowFilter(".*regex.*", colvalues)); + filters.add(new WhileMatchRowFilter(new StopRowFilter(new Text("yyy")))); + filters.add(new WhileMatchRowFilter(new RegExpRowFilter(".*match.*"))); + filterMPALL = new RowFilterSet(RowFilterSet.Operator.MUST_PASS_ALL, + filters); + filterMPONE = new RowFilterSet(RowFilterSet.Operator.MUST_PASS_ONE, + filters); + } + + public void testMPONE() throws Exception { + MPONETests(filterMPONE); + } + + public void testMPALL() throws Exception { + MPALLTests(filterMPALL); + } + + public void testSerialization() throws Exception { + // Decompose filterMPALL to bytes. + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(stream); + filterMPALL.write(out); + out.close(); + byte[] buffer = stream.toByteArray(); + + // Recompose filterMPALL. + DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer)); + RowFilterInterface newFilter = new RowFilterSet(); + newFilter.readFields(in); + + // Ensure the serialization preserved the filter by running a full test. + MPALLTests(newFilter); + } + + private void MPONETests(RowFilterInterface filter) throws Exception { + // A row that shouldn't cause any filters to return true. + RFSAssertion(filter, "regex_match", false); + + // A row that should cause the WhileMatchRowFilter to filter all remaining. + RFSAssertion(filter, "regex_only", false); + + // Make sure the overall filterAllRemaining is unchanged (correct for + // MUST_PASS_ONE). + assertFalse(filter.filterAllRemaining()); + + // A row that should cause the RegExpRowFilter to fail and the + // StopRowFilter to filter all remaining. + RFSAssertion(filter, "yyy_match", false); + + // Accept several more rows such that PageRowFilter will exceed its limit. + for (int i=0; i<=MAX_PAGES-3; i++) + filter.rowProcessed(false, new Text("unimportant_key")); + + // A row that should cause the RegExpRowFilter to filter this row, making + // all the filters return true and thus the RowFilterSet as well. + RFSAssertion(filter, "bad_column", true); + + // Make sure the overall filterAllRemaining is unchanged (correct for + // MUST_PASS_ONE). + assertFalse(filter.filterAllRemaining()); + } + + private void MPALLTests(RowFilterInterface filter) throws Exception { + // A row that shouldn't cause any filters to return true. + RFSAssertion(filter, "regex_match", false); + + // A row that should cause WhileMatchRowFilter to filter all remaining. + RFSAssertion(filter, "regex_only", true); + + // Make sure the overall filterAllRemaining is changed (correct for + // MUST_PASS_ALL). + RFSAssertReset(filter); + + // A row that should cause the RegExpRowFilter to fail and the + // StopRowFilter to filter all remaining. + RFSAssertion(filter, "yyy_match", true); + + // Make sure the overall filterAllRemaining is changed (correct for + // MUST_PASS_ALL). + RFSAssertReset(filter); + + // A row that should cause the RegExpRowFilter to fail. + boolean filtered = filter.filter(new Text("regex_match"), + new Text(new String(new char[] { FIRST_CHAR })), BAD_BYTES); + assertTrue("Filtering on 'regex_match' and bad column data.", filtered); + filterMPALL.rowProcessed(filtered, new Text("regex_match")); + } + + private void RFSAssertion(RowFilterInterface filter, String toTest, + boolean assertTrue) throws Exception { + Text testText = new Text(toTest); + boolean filtered = filter.filter(testText); + assertTrue("Filtering on '" + toTest + "'", + assertTrue? filtered : !filtered); + filter.rowProcessed(filtered, testText); + } + + private void RFSAssertReset(RowFilterInterface filter) throws Exception{ + assertTrue(filter.filterAllRemaining()); + // Reset for continued testing + filter.reset(); + assertFalse(filter.filterAllRemaining()); + } +} Index: src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java =================================================================== --- src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java (revision 557761) +++ src/contrib/hbase/src/test/org/apache/hadoop/hbase/filter/TestPageRowFilter.java (working copy) @@ -19,34 +19,67 @@ */ package org.apache.hadoop.hbase.filter; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.DataOutputStream; + import org.apache.hadoop.io.Text; import junit.framework.TestCase; public class TestPageRowFilter extends TestCase { + + RowFilterInterface mainFilter; + final int ROW_LIMIT = 3; + + protected void setUp() throws Exception { + super.setUp(); + mainFilter = new PageRowFilter(ROW_LIMIT); + } + public void testPageSize() throws Exception { - final int pageSize = 3; - RowFilterInterface filter = new PageRowFilter(pageSize); - testFiltersBeyondPageSize(filter, pageSize); + pageSizeTests(mainFilter); + } + + public void testSerialization() throws Exception { + // Decompose mainFilter to bytes. + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + DataOutputStream out = new DataOutputStream(stream); + mainFilter.write(out); + out.close(); + byte[] buffer = stream.toByteArray(); + + // Recompose mainFilter. + DataInputStream in = new DataInputStream(new ByteArrayInputStream(buffer)); + RowFilterInterface newFilter = new PageRowFilter(); + newFilter.readFields(in); + + // Ensure the serialization preserved the filter by running a full test. + pageSizeTests(newFilter); + } + + private void pageSizeTests(RowFilterInterface filter) throws Exception { + testFiltersBeyondPageSize(filter, ROW_LIMIT); // Test reset works by going in again. filter.reset(); - testFiltersBeyondPageSize(filter, pageSize); + testFiltersBeyondPageSize(filter, ROW_LIMIT); } private void testFiltersBeyondPageSize(final RowFilterInterface filter, - final int pageSize) { + final int pageSize) { for (int i = 0; i < (pageSize * 2); i++) { Text row = new Text(Integer.toString(i)); boolean filterOut = filter.filter(row); if (!filterOut) { assertFalse("Disagrees with 'filter'", filter.filterAllRemaining()); - filter.acceptedRow(row); } else { // Once we have all for a page, calls to filterAllRemaining should // stay true. assertTrue("Disagrees with 'filter'", filter.filterAllRemaining()); assertTrue(i >= pageSize); } + filter.rowProcessed(filterOut, row); } } } Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (revision 557761) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RegExpRowFilter.java (working copy) @@ -31,6 +31,8 @@ import java.util.Map.Entry; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.io.Text; @@ -47,6 +49,8 @@ private Map equalsMap = new HashMap(); private Set nullColumns = new HashSet(); + static final Log LOG = LogFactory.getLog(RegExpRowFilter.class); + /** * Default constructor, filters nothing. Required though for RPC * deserialization. @@ -80,11 +84,19 @@ * * {@inheritDoc} */ - public void acceptedRow(@SuppressWarnings("unused") final Text key) { + public void rowProcessed(boolean filtered, Text rowKey) { //doesn't care } /** + * + * {@inheritDoc} + */ + public boolean processAlways() { + return false; + } + + /** * Specify a value that must be matched for the given column. * * @param colKey @@ -93,7 +105,7 @@ * the value that must equal the stored value. */ public void setColumnFilter(final Text colKey, final byte[] value) { - if (null == value) { + if (value == null) { nullColumns.add(colKey); } else { equalsMap.put(colKey, value); @@ -139,7 +151,11 @@ */ public boolean filter(final Text rowKey) { if (filtersByRowKey() && rowKey != null) { - return !getRowKeyPattern().matcher(rowKey.toString()).matches(); + boolean result = !getRowKeyPattern().matcher(rowKey.toString()).matches(); + if (LOG.isDebugEnabled()) { + LOG.debug("filter returning " + result + " for rowKey: " + rowKey); + } + return result; } return false; } @@ -156,14 +172,27 @@ if (filtersByColumnValue()) { byte[] filterValue = equalsMap.get(colKey); if (null != filterValue) { - return !Arrays.equals(filterValue, data); + boolean result = !Arrays.equals(filterValue, data); + if (LOG.isDebugEnabled()) { + LOG.debug("filter returning " + result + " for rowKey: " + rowKey + + " colKey: " + colKey); + } + return result; } } if (nullColumns.contains(colKey)) { if (data != null && !Arrays.equals(HConstants.DELETE_BYTES.get(), data)) { + if (LOG.isDebugEnabled()) { + LOG.debug("filter returning true for rowKey: " + rowKey + + " colKey: " + colKey); + } return true; } } + if (LOG.isDebugEnabled()) { + LOG.debug("filter returning false for rowKey: " + rowKey + " colKey: " + + colKey); + } return false; } @@ -175,14 +204,25 @@ for (Entry col : columns.entrySet()) { if (nullColumns.contains(col.getKey()) && !Arrays.equals(HConstants.DELETE_BYTES.get(), col.getValue())) { + if (LOG.isDebugEnabled()) { + LOG.debug("filterNotNull returning true for colKey: " + col.getKey() + + ", column should be null."); + } return true; } } for (Text col : equalsMap.keySet()) { if (!columns.containsKey(col)) { + if (LOG.isDebugEnabled()) { + LOG.debug("filterNotNull returning true for colKey: " + col + + ", column not found in given TreeMap."); + } return true; } } + if (LOG.isDebugEnabled()) { + LOG.debug("filterNotNull returning false."); + } return false; } @@ -215,7 +255,7 @@ public void readFields(final DataInput in) throws IOException { boolean hasRowKeyPattern = in.readBoolean(); if (hasRowKeyPattern) { - rowKeyRegExp = in.readLine(); + rowKeyRegExp = in.readUTF(); } // equals map equalsMap.clear(); @@ -283,7 +323,7 @@ out.writeBoolean(false); } else { out.writeBoolean(true); - out.writeChars(getRowKeyRegExp()); + out.writeUTF(getRowKeyRegExp()); } // equalsMap Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java (revision 557761) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterSet.java (working copy) @@ -26,23 +26,27 @@ import java.util.Set; import java.util.TreeMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; /** * Implementation of RowFilterInterface that represents a set of RowFilters - * which will be evaluated with a specified boolean operator AND/OR. Since you - * can use RowFilterSets as children of RowFilterSet, you can create a - * hierarchy of filters to be evaluated. + * which will be evaluated with a specified boolean operator MUST_PASS_ALL + * (!AND) or MUST_PASS_ONE (!OR). Since you can use RowFilterSets as children + * of RowFilterSet, you can create a hierarchy of filters to be evaluated. */ public class RowFilterSet implements RowFilterInterface { - enum Operator { - AND, OR + public static enum Operator { + MUST_PASS_ALL, MUST_PASS_ONE } - private Operator operator = Operator.AND; + private Operator operator = Operator.MUST_PASS_ALL; private Set filters = new HashSet(); + static final Log LOG = LogFactory.getLog(RowFilterSet.class); + /** * Default constructor, filters nothing. Required though for RPC * deserialization. @@ -52,8 +56,8 @@ } /** - * Constructor that takes a set of RowFilters. The default operator AND is - * assumed. + * Constructor that takes a set of RowFilters. The default operator + * MUST_PASS_ALL is assumed. * * @param rowFilters */ @@ -80,6 +84,10 @@ public void validate(final Text[] columns) { for (RowFilterInterface filter : filters) { filter.validate(columns); + if (LOG.isDebugEnabled()) { + LOG.debug("Validated subfilter of type " + + filter.getClass().getSimpleName()); + } } } @@ -90,6 +98,10 @@ public void reset() { for (RowFilterInterface filter : filters) { filter.reset(); + if (LOG.isDebugEnabled()) { + LOG.debug("Reset subfilter of type " + + filter.getClass().getSimpleName()); + } } } @@ -97,9 +109,13 @@ * * {@inheritDoc} */ - public void acceptedRow(final Text key) { + public void rowProcessed(boolean filtered, Text rowKey) { for (RowFilterInterface filter : filters) { - filter.acceptedRow(key); + filter.rowProcessed(filtered, rowKey); + if (LOG.isDebugEnabled()) { + LOG.debug("Called rowProcessed on subfilter of type " + + filter.getClass().getSimpleName()); + } } } @@ -107,19 +123,47 @@ * * {@inheritDoc} */ + public boolean processAlways() { + for (RowFilterInterface filter : filters) { + if (filter.processAlways()) { + if (LOG.isDebugEnabled()) { + LOG.debug("processAlways() is true due to subfilter of type " + + filter.getClass().getSimpleName()); + } + return true; + } + } + return false; + } + + /** + * + * {@inheritDoc} + */ public boolean filterAllRemaining() { - boolean result = operator == Operator.OR; + boolean result = operator == Operator.MUST_PASS_ONE; for (RowFilterInterface filter : filters) { - if (operator == Operator.AND) { + if (operator == Operator.MUST_PASS_ALL) { if (filter.filterAllRemaining()) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPALL filterAllRemaining returning true due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } return true; } - } else if (operator == Operator.OR) { + } else if (operator == Operator.MUST_PASS_ONE) { if (!filter.filterAllRemaining()) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPONE filterAllRemaining returning false due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } return false; } } } + if (LOG.isDebugEnabled()) { + LOG.debug("filterAllRemaining default returning " + result); + } return result; } @@ -128,40 +172,79 @@ * {@inheritDoc} */ public boolean filter(final Text rowKey) { - boolean result = operator == Operator.OR; + boolean resultFound = false; + boolean result = operator == Operator.MUST_PASS_ONE; for (RowFilterInterface filter : filters) { - if (operator == Operator.AND) { - if (filter.filterAllRemaining() || filter.filter(rowKey)) { - return true; + if (!resultFound) { + if (operator == Operator.MUST_PASS_ALL) { + if (filter.filterAllRemaining() || filter.filter(rowKey)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPALL filter(Text) will return true due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } + result = true; + resultFound = true; + } + } else if (operator == Operator.MUST_PASS_ONE) { + if (!filter.filterAllRemaining() && !filter.filter(rowKey)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPONE filter(Text) will return false due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } + result = false; + resultFound = true; + } } - } else if (operator == Operator.OR) { - if (!filter.filterAllRemaining() && !filter.filter(rowKey)) { - return false; - } + } else if (filter.processAlways()) { + filter.filter(rowKey); } } + if (LOG.isDebugEnabled()) { + LOG.debug("filter(Text) returning " + result); + } return result; - } /** * * {@inheritDoc} */ - public boolean filter(final Text rowKey, final Text colKey, final byte[] data) { - boolean result = operator == Operator.OR; + public boolean filter(final Text rowKey, final Text colKey, + final byte[] data) { + boolean resultFound = false; + boolean result = operator == Operator.MUST_PASS_ONE; for (RowFilterInterface filter : filters) { - if (operator == Operator.AND) { - if (filter.filterAllRemaining() || filter.filter(rowKey, colKey, data)) { - return true; + if (!resultFound) { + if (operator == Operator.MUST_PASS_ALL) { + if (filter.filterAllRemaining() || + filter.filter(rowKey, colKey, data)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPALL filter(Text, Text, byte[]) will" + + " return true due to subfilter of type " + + filter.getClass().getSimpleName()); + } + result = true; + resultFound = true; + } + } else if (operator == Operator.MUST_PASS_ONE) { + if (!filter.filterAllRemaining() && + !filter.filter(rowKey, colKey, data)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPONE filter(Text, Text, byte[]) will" + + " return false due to subfilter of type " + + filter.getClass().getSimpleName()); + } + result = false; + resultFound = true; + } } - } else if (operator == Operator.OR) { - if (!filter.filterAllRemaining() - && !filter.filter(rowKey, colKey, data)) { - return false; - } + } else if (filter.processAlways()) { + filter.filter(rowKey, colKey, data); } } + if (LOG.isDebugEnabled()) { + LOG.debug("filter(Text, Text, byte[]) returning " + result); + } return result; } @@ -170,18 +253,36 @@ * {@inheritDoc} */ public boolean filterNotNull(final TreeMap columns) { - boolean result = operator == Operator.OR; + boolean resultFound = false; + boolean result = operator == Operator.MUST_PASS_ONE; for (RowFilterInterface filter : filters) { - if (operator == Operator.AND) { - if (filter.filterAllRemaining() || filter.filterNotNull(columns)) { - return true; + if (!resultFound) { + if (operator == Operator.MUST_PASS_ALL) { + if (filter.filterAllRemaining() || filter.filterNotNull(columns)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPALL filterNotNull will return true due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } + result = true; + resultFound = true; + } + } else if (operator == Operator.MUST_PASS_ONE) { + if (!filter.filterAllRemaining() && !filter.filterNotNull(columns)) { + if (LOG.isDebugEnabled()) { + LOG.debug("op.MPONE filterNotNull will return false due" + + " to subfilter of type " + filter.getClass().getSimpleName()); + } + result = false; + resultFound = true; + } } - } else if (operator == Operator.OR) { - if (!filter.filterAllRemaining() && !filter.filterNotNull(columns)) { - return false; - } + } else if (filter.processAlways()) { + filter.filterNotNull(columns); } } + if (LOG.isDebugEnabled()) { + LOG.debug("filterNotNull returning " + result); + } return result; } @@ -203,6 +304,10 @@ filter = (RowFilterInterface) clazz.newInstance(); filter.readFields(in); filters.add(filter); + if (LOG.isDebugEnabled()) { + LOG.debug("Successfully read in subfilter of type " + + filter.getClass().getSimpleName()); + } } } catch (InstantiationException e) { throw new RuntimeException("Failed to deserialize RowFilterInterface.", Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java (revision 557761) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/PageRowFilter.java (working copy) @@ -24,6 +24,8 @@ import java.io.IOException; import java.util.TreeMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.Text; /** @@ -44,6 +46,8 @@ private long pageSize = Long.MAX_VALUE; private int rowsAccepted = 0; + static final Log LOG = LogFactory.getLog(PageRowFilter.class); + /** * Default constructor, filters nothing. Required though for RPC * deserialization. @@ -81,19 +85,35 @@ * * {@inheritDoc} */ - public void acceptedRow(@SuppressWarnings("unused") final Text key) { - rowsAccepted++; + public void rowProcessed(boolean filtered, Text rowKey) { + if (!filtered) { + this.rowsAccepted++; + if (LOG.isDebugEnabled()) { + LOG.debug("rowProcessed incremented rowsAccepted to " + + this.rowsAccepted); + } + } } /** * * {@inheritDoc} */ + public boolean processAlways() { + return false; + } + + /** + * + * {@inheritDoc} + */ public boolean filterAllRemaining() { - if (this.rowsAccepted > this.pageSize) { - return true; + boolean result = this.rowsAccepted > this.pageSize; + if (LOG.isDebugEnabled()) { + LOG.debug("filtering decision is " + result + " with rowsAccepted: " + + this.rowsAccepted); } - return false; + return result; } /** Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java (revision 557761) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/filter/RowFilterInterface.java (working copy) @@ -21,7 +21,6 @@ import java.util.TreeMap; -import org.apache.hadoop.hbase.HRegion; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -39,18 +38,31 @@ void reset(); /** - * Called to let filter know that the specified row has been included in the - * results (passed all filtering). With out HScanner calling this, the filter - * does not know if a row passed filtering even if it passed the row itself - * because other filters may have failed the row. E.g. when this filter is a - * member of a RowFilterSet with an OR operator. + * Called to let filter know the final decision (to pass or filter) on a + * given row. With out HScanner calling this, the filter does not know if a + * row passed filtering even if it passed the row itself because other + * filters may have failed the row. E.g. when this filter is a member of a + * RowFilterSet with an OR operator. * * @see RowFilterSet * @param key */ - void acceptedRow(final Text key); + void rowProcessed(boolean filtered, Text rowKey); /** + * Returns whether or not the filter should always be processed in any + * filtering call. This precaution is necessary for filters that maintain + * state and need to be updated according to their response to filtering + * calls (see WhileMatchRowFilter for an example). At times, filters nested + * in RowFilterSets may or may not be called because the RowFilterSet + * determines a result as fast as possible. Returning true for + * processAlways() ensures that the filter will always be called. + * + * @return whether or not to always process the filter + */ + boolean processAlways(); + + /** * Determines if the filter has decided that all remaining results should be * filtered (skipped). This is used to prevent the scanner from scanning a * the rest of the HRegion when for sure the filter will exclude all @@ -82,14 +94,16 @@ boolean filter(final Text rowKey, final Text colKey, final byte[] data); /** - * Filters row if given columns are non-null and have null criteria or if - * there exists criteria on columns not included in the column set. A column - * is considered null if it: - *
    - *
  • Is not included in the given columns.
  • - *
  • Has a value of HConstants.DELETE_BYTES
  • - *
+ * Filters a row if: + * 1) The given row (@param columns) has a columnKey expected to be null AND + * the value associated with that columnKey is non-null. + * 2) The filter has a criterion for a particular columnKey, but that + * columnKey is not in the given row (@param columns). * + * Note that filterNotNull does not care whether the values associated with a + * columnKey match. Also note that a "null value" associated with a columnKey + * is expressed as HConstants.DELETE_BYTES. + * * @param columns * @return true if null/non-null criteria not met. */ Index: src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java =================================================================== --- src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (revision 557761) +++ src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java (working copy) @@ -1485,8 +1485,8 @@ moreToFollow = chosenTimestamp > 0; if (dataFilter != null) { - if (moreToFollow && !filtered) { - dataFilter.acceptedRow(chosenRow); + if (moreToFollow) { + dataFilter.rowProcessed(filtered, chosenRow); } if (dataFilter.filterAllRemaining()) { moreToFollow = false;