Index: src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java =================================================================== --- src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java (revision 718387) +++ src/java/org/apache/hadoop/hbase/filter/ColumnValueFilter.java (working copy) @@ -53,11 +53,7 @@ /** greater than or equal to */ GREATER_OR_EQUAL, /** greater than */ - GREATER, - // Below are more specific operators. - /** sub-string. Case insensitive. */ - SUB_STRING; - + GREATER; } private byte[] columnName; @@ -143,12 +139,6 @@ } private boolean filterColumnValue(final byte [] data) { - // Special case for Substring operator - if (compareOp == CompareOp.SUB_STRING) { - return !Bytes.toString(data).toLowerCase().contains( - (Bytes.toString(value)).toLowerCase()); - } - int compareResult; if (comparator != null) { compareResult = comparator.compareTo(data); Index: src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java =================================================================== --- src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java (revision 0) +++ src/java/org/apache/hadoop/hbase/filter/SubstringComparator.java (revision 0) @@ -0,0 +1,74 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This comparator is for use with ColumnValueFilter, for filtering based on + * the value of a given column. Use it to test if a given substring appears + * in a cell value in the column. The comparison is case insensitive. + *

+ * Only EQUAL or NOT_EQUAL tests are valid with this comparator. + *

+ * For example: + *

+ *

+ * ColumnValueFilter cvf =
+ *   new ColumnValueFilter("col", ColumnValueFilter.CompareOp.EQUAL,
+ *     new SubstringComparator("substr"));
+ * 
+ */ +public class SubstringComparator implements WritableByteArrayComparable { + + private String substr; + + /** Nullary constructor for Writable */ + public SubstringComparator() { + } + + /** + * Constructor + * @param substr the substring + */ + public SubstringComparator(String substr) { + this.substr = substr.toLowerCase(); + } + + @Override + public int compareTo(byte[] value) { + return Bytes.toString(value).toLowerCase().contains(substr) ? 0 : 1; + } + + @Override + public void readFields(DataInput in) throws IOException { + substr = in.readUTF(); + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(substr); + } + +} Index: src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java =================================================================== --- src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java (revision 0) +++ src/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java (revision 0) @@ -0,0 +1,85 @@ +/** + * Copyright 2008 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.regex.Pattern; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This comparator is for use with ColumnValueFilter, for filtering based on + * the value of a given column. Use it to test if a given regular expression + * matches a cell value in the column. + *

+ * Only EQUAL or NOT_EQUAL tests are valid with this comparator. + *

+ * For example: + *

+ *

+ * ColumnValueFilter cvf =
+ *   new ColumnValueFilter("col",
+ *     ColumnValueFilter.CompareOp.EQUAL,
+ *     new RegexStringComparator(
+ *       // v4 IP address
+ *       "(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3,3}" +
+ *         "(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))(\\/[0-9]+)?" +
+ *       "|" +
+ *       // v6 IP address
+ *       "((([\\dA-Fa-f]{1,4}:){7}[\\dA-Fa-f]{1,4})(:([\\d]{1,3}.)" +
+ *         "{3}[\\d]{1,3})?)(\\/[0-9]+)?"));
+ * 
+ */ +public class RegexStringComparator implements WritableByteArrayComparable { + + private Pattern pattern; + + /** Nullary constructor for Writable */ + public RegexStringComparator() { + } + + /** + * Constructor + * @param expr a valid regular expression + */ + public RegexStringComparator(String expr) { + this.pattern = Pattern.compile(expr); + } + + @Override + public int compareTo(byte[] value) { + // Use find() for subsequence match instead of matches() (full sequence + // match) to adhere to the principle of least surprise. + return pattern.matcher(Bytes.toString(value)).find() ? 0 : 1; + } + + @Override + public void readFields(DataInput in) throws IOException { + this.pattern = Pattern.compile(in.readUTF()); + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeUTF(pattern.toString()); + } + +}