diff --git c/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java i/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java new file mode 100644 index 0000000..c6eb75b --- /dev/null +++ i/src/main/java/org/apache/hadoop/hbase/filter/ColumnRangeFilter.java @@ -0,0 +1,172 @@ +/* + * Copyright 2010 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.filter; + +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.util.Bytes; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.DataInput; + +/** + * This filter is used for selecting only those keys with columns that are + * between minColumn to maxColumn. For example, if minColumn is 'an', and + * maxColumn is 'be', it will pass keys with columns like 'ana', 'bad', but not + * keys with columns like 'bed', 'eye' + * + * If minColumn is null, there is no lower bound. If maxColumn is null, there is + * no upper bound. + * + * minColumnInclusive and maxColumnInclusive specify if the ranges are inclusive + * or not. + */ +public class ColumnRangeFilter extends FilterBase { + protected byte[] minColumn = null; + protected boolean minColumnInclusive = true; + protected byte[] maxColumn = null; + protected boolean maxColumnInclusive = false; + + public ColumnRangeFilter() { + super(); + } + /** + * Create a filter to select those keys with columns that are between minColumn + * and maxColumn. + * @param minColumn minimum value for the column range. If if it's null, + * there is no lower bound. + * @param minColumnInclusive if true, include minColumn in the range. + * @param maxColumn maximum value for the column range. If it's null, + * @param maxColumnInclusive if true, include maxColumn in the range. + * there is no upper bound. + */ + public ColumnRangeFilter(final byte[] minColumn, boolean minColumnInclusive, + final byte[] maxColumn, boolean maxColumnInclusive) { + this.minColumn = minColumn; + this.minColumnInclusive = minColumnInclusive; + this.maxColumn = maxColumn; + this.maxColumnInclusive = maxColumnInclusive; + } + + /** + * @return if min column range is inclusive. + */ + public boolean isMinColumnInclusive() { + return minColumnInclusive; + } + + /** + * @return if max column range is inclusive. + */ + public boolean isMaxColumnInclusive() { + return maxColumnInclusive; + } + + /** + * @return the min column range for the filter + */ + public byte[] getMinColumn() { + return this.minColumn; + } + + /** + * @return the max column range for the filter + */ + public byte[] getMaxColumn() { + return this.maxColumn; + } + + @Override + public ReturnCode filterKeyValue(KeyValue kv) { + byte[] buffer = kv.getBuffer(); + int qualifierOffset = kv.getQualifierOffset(); + int qualifierLength = kv.getQualifierLength(); + int cmpMin = 1; + + if (this.minColumn != null) { + cmpMin = Bytes.compareTo(buffer, qualifierOffset, qualifierLength, + this.minColumn, 0, this.minColumn.length); + } + + if (cmpMin < 0) { + return ReturnCode.SEEK_NEXT_USING_HINT; + } + + if (!this.minColumnInclusive && cmpMin == 0) { + return ReturnCode.SKIP; + } + + if (this.maxColumn == null) { + return ReturnCode.INCLUDE; + } + + int cmpMax = Bytes.compareTo(buffer, qualifierOffset, qualifierLength, + this.maxColumn, 0, this.maxColumn.length); + + if (this.maxColumnInclusive && cmpMax <= 0 || + !this.maxColumnInclusive && cmpMax < 0) { + return ReturnCode.INCLUDE; + } + + return ReturnCode.NEXT_ROW; + } + + @Override + public void write(DataOutput out) throws IOException { + // need to write out a flag for null value separately. Otherwise, + // we will not be able to differentiate empty string and null + out.writeBoolean(this.minColumn == null); + Bytes.writeByteArray(out, this.minColumn); + out.writeBoolean(this.minColumnInclusive); + + out.writeBoolean(this.maxColumn == null); + Bytes.writeByteArray(out, this.maxColumn); + out.writeBoolean(this.maxColumnInclusive); + } + + @Override + public void readFields(DataInput in) throws IOException { + boolean isMinColumnNull = in.readBoolean(); + this.minColumn = Bytes.readByteArray(in); + + if (isMinColumnNull) { + this.minColumn = null; + } + + this.minColumnInclusive = in.readBoolean(); + + boolean isMaxColumnNull = in.readBoolean(); + this.maxColumn = Bytes.readByteArray(in); + if (isMaxColumnNull) { + this.maxColumn = null; + } + this.maxColumnInclusive = in.readBoolean(); + } + + + @Override + public KeyValue getNextKeyHint(KeyValue kv) { + return KeyValue.createFirstOnRow(kv.getBuffer(), kv.getRowOffset(), kv + .getRowLength(), kv.getBuffer(), kv.getFamilyOffset(), kv + .getFamilyLength(), this.minColumn, 0, this.minColumn == null ? 0 + : this.minColumn.length); + } +} diff --git c/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java i/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java index f397f41..d8f8463 100644 --- c/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java +++ i/src/main/java/org/apache/hadoop/hbase/io/HbaseObjectWritable.java @@ -64,6 +64,7 @@ import org.apache.hadoop.hbase.filter.BinaryComparator; import org.apache.hadoop.hbase.filter.BitComparator; import org.apache.hadoop.hbase.filter.ColumnCountGetFilter; import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; +import org.apache.hadoop.hbase.filter.ColumnRangeFilter; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; import org.apache.hadoop.hbase.filter.DependentColumnFilter; @@ -222,6 +223,8 @@ public class HbaseObjectWritable implements Writable, WritableWithSize, Configur addToMap(RandomRowFilter.class, code++); addToMap(CompareOp.class, code++); + + addToMap(ColumnRangeFilter.class, code++); } private Class declaredClass; diff --git c/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java i/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java new file mode 100644 index 0000000..b1c2ed1 --- /dev/null +++ i/src/test/java/org/apache/hadoop/hbase/filter/TestColumnRangeFilter.java @@ -0,0 +1,242 @@ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.client.HTable; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Test; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; + +class StringRange { + private String start = null; + private String end = null; + private boolean startInclusive = true; + private boolean endInclusive = false; + + public StringRange(String start, boolean startInclusive, String end, + boolean endInclusive) { + this.start = start; + this.startInclusive = startInclusive; + this.end = end; + this.endInclusive = endInclusive; + } + + public String getStart() { + return this.start; + } + + public String getEnd() { + return this.end; + } + + public boolean isStartInclusive() { + return this.startInclusive; + } + + public boolean isEndInclusive() { + return this.endInclusive; + } + + @Override + public int hashCode() { + int hashCode = 0; + if (this.start != null) { + hashCode ^= this.start.hashCode(); + } + + if (this.end != null) { + hashCode ^= this.end.hashCode(); + } + return hashCode; + } + + @Override + public String toString() { + String result = (this.startInclusive ? "[" : "(") + + (this.start == null ? null : this.start) + ", " + + (this.end == null ? null : this.end) + + (this.endInclusive ? "]" : ")"); + return result; + } + + public boolean inRange(String value) { + boolean afterStart = true; + if (this.start != null) { + int startCmp = value.compareTo(this.start); + afterStart = this.startInclusive ? startCmp >= 0 : startCmp > 0; + } + + boolean beforeEnd = true; + if (this.end != null) { + int endCmp = value.compareTo(this.end); + beforeEnd = this.endInclusive ? endCmp <= 0 : endCmp < 0; + } + + return afterStart && beforeEnd; + } +} + +public class TestColumnRangeFilter { + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + + private final Log LOG = LogFactory.getLog(this.getClass()); + + /** + * @throws java.lang.Exception + */ + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(3); + } + + /** + * @throws java.lang.Exception + */ + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + /** + * @throws java.lang.Exception + */ + @Before + public void setUp() throws Exception { + // Nothing to do. + } + + /** + * @throws java.lang.Exception + */ + @After + public void tearDown() throws Exception { + // Nothing to do. + } + + @Test + public void TestColumnRangeFilterClient() throws Exception { + String family = "Family"; + String table = "TestColumnRangeFilterClient"; + HTable ht = TEST_UTIL.createTable(Bytes.toBytes(table), + Bytes.toBytes(family), Integer.MAX_VALUE); + + List rows = generateRandomWords(10, 8); + long maxTimestamp = 2; + List columns = generateRandomWords(20000, 8); + + List kvList = new ArrayList(); + + Map> rangeMap = new HashMap>(); + + rangeMap.put(new StringRange(null, true, "b", false), + new ArrayList()); + rangeMap.put(new StringRange("p", true, "q", false), + new ArrayList()); + rangeMap.put(new StringRange("r", false, "s", true), + new ArrayList()); + rangeMap.put(new StringRange("z", false, null, false), + new ArrayList()); + String valueString = "ValueString"; + + for (String row : rows) { + Put p = new Put(Bytes.toBytes(row)); + for (String column : columns) { + for (long timestamp = 1; timestamp <= maxTimestamp; timestamp++) { + KeyValue kv = KeyValueTestUtil.create(row, family, column, timestamp, + valueString); + p.add(kv); + kvList.add(kv); + for (StringRange s : rangeMap.keySet()) { + if (s.inRange(column)) { + rangeMap.get(s).add(kv); + } + } + } + } + ht.put(p); + } + + TEST_UTIL.flush(); + try { + Thread.sleep(3000); + } catch (InterruptedException i) { + // ignore + } + + ColumnRangeFilter filter; + Scan scan = new Scan(); + scan.setMaxVersions(); + for (StringRange s : rangeMap.keySet()) { + filter = new ColumnRangeFilter(s.getStart() == null ? null + : Bytes.toBytes(s.getStart()), s.isStartInclusive(), + s.getEnd() == null ? null : Bytes.toBytes(s.getEnd()), + s.isEndInclusive()); + scan.setFilter(filter); + ResultScanner scanner = ht.getScanner(scan); + List results = new ArrayList(); + LOG.info("scan column range: " + s.toString()); + long timeBeforeScan = System.currentTimeMillis(); + + Result result; + while ((result = scanner.next()) != null) { + for (KeyValue kv : result.list()) { + results.add(kv); + } + } + long scanTime = System.currentTimeMillis() - timeBeforeScan; + LOG.info("scan time = " + scanTime + "ms"); + LOG.info("found " + results.size() + " results"); + LOG.info("Expecting " + rangeMap.get(s).size() + " results"); + + /* + for (KeyValue kv : results) { + LOG.info("found row " + Bytes.toString(kv.getRow()) + ", column " + + Bytes.toString(kv.getQualifier())); + } + */ + + assertEquals(rangeMap.get(s).size(), results.size()); + } + } + + List generateRandomWords(int numberOfWords, int maxLengthOfWords) { + Set wordSet = new HashSet(); + for (int i = 0; i < numberOfWords; i++) { + int lengthOfWords = (int) (Math.random() * maxLengthOfWords) + 1; + char[] wordChar = new char[lengthOfWords]; + for (int j = 0; j < wordChar.length; j++) { + wordChar[j] = (char) (Math.random() * 26 + 97); + } + String word = new String(wordChar); + wordSet.add(word); + } + List wordList = new ArrayList(wordSet); + return wordList; + } +}