From 1cf6133920af4156100508d21447b4874773b38b Mon Sep 17 00:00:00 2001 From: Udai Bhan Kashyap Date: Sat, 5 Oct 2019 22:59:43 -0400 Subject: [PATCH] HBASE-22969:A new binary component comparator(BinaryComponentComparator) to perform comparison of arbitrary length and position --- .../filter/BinaryComponentComparator.java | 125 +++++++++ .../hadoop/hbase/filter/TestComparators.java | 53 ++++ .../src/main/protobuf/Comparator.proto | 5 + .../src/main/protobuf/Comparator.proto | 5 + ...tFiltersWithBinaryComponentComparator.java | 254 ++++++++++++++++++ 5 files changed, 442 insertions(+) create mode 100644 hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java new file mode 100644 index 0000000000..1b94d53563 --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/BinaryComponentComparator.java @@ -0,0 +1,125 @@ +/* + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.filter; + +import com.google.protobuf.ByteString; +import com.google.protobuf.InvalidProtocolBufferException; +import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.protobuf.generated.ComparatorProtos; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; + +/** + * A comparator which compares against a specified byte array, but only + * compares specific portion of the byte array. For the rest it is similar to + * {@link BinaryComparator}. + */ +@InterfaceAudience.Public +@SuppressWarnings("ComparableType") +public class BinaryComponentComparator extends ByteArrayComparable { + private int offset; //offset of component from beginning. + + /** + * Constructor + * + * @param value value of the component + * @param offset offset of the component from begining + */ + public BinaryComponentComparator(byte[] value, int offset) { + super(value); + this.offset = offset; + } + + @Override + public int compareTo(byte[] value) { + return compareTo(value, 0, value.length); + } + + @Override + public int compareTo(byte[] value, int offset, int length) { + return Bytes.compareTo(this.value, 0, this.value.length, value, offset + this.offset, + this.value.length); + } + + @Override + public boolean equals(Object other) { + if(other == this){ + return true; + } + if(!(other instanceof BinaryComponentComparator)){ + return false; + } + BinaryComponentComparator bcc = (BinaryComponentComparator)other; + return offset == bcc.offset && + (compareTo(bcc.value) == 0); + } + + @Override + public int hashCode() { + int result = super.hashCode(); + result = 31 * result + offset; + return result; + } + + /** + * @return The comparator serialized using pb + */ + @Override + public byte[] toByteArray() { + ComparatorProtos.BinaryComponentComparator.Builder builder = + ComparatorProtos.BinaryComponentComparator.newBuilder(); + builder.setValue(ByteString.copyFrom(this.value)); + builder.setOffset(this.offset); + return builder.build().toByteArray(); + } + + /** + * @param pbBytes A pb serialized {@link BinaryComponentComparator} instance + * @return An instance of {@link BinaryComponentComparator} made from bytes + * @throws DeserializationException DeserializationException + * @see #toByteArray + */ + public static BinaryComponentComparator parseFrom(final byte[] pbBytes) + throws DeserializationException { + ComparatorProtos.BinaryComponentComparator proto; + try { + proto = ComparatorProtos.BinaryComponentComparator.parseFrom(pbBytes); + } catch (InvalidProtocolBufferException e) { + throw new DeserializationException(e); + } + return new BinaryComponentComparator(proto.getValue().toByteArray(), proto.getOffset()); + } + + /** + * @param other paramemter to compare against + * @return true if and only if the fields of the comparator that are + * serialized are equal to the corresponding fields in other. Used for testing. + */ + @Override + boolean areSerializedFieldsEqual(ByteArrayComparable other) { + if (other == this){ + return true; + } + if (!(other instanceof BinaryComponentComparator)){ + return false; + } + return super.areSerializedFieldsEqual(other); + } +} diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java index 3835948b93..868f3b7fda 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/filter/TestComparators.java @@ -104,6 +104,59 @@ public class TestComparators { assertTrue(PrivateCellUtil.qualifierStartsWith(kv, q1)); assertFalse(PrivateCellUtil.qualifierStartsWith(kv, q2)); assertFalse(PrivateCellUtil.qualifierStartsWith(kv, Bytes.toBytes("longerthanthequalifier"))); + + //Binary component comparisons + byte[] val = Bytes.toBytes("abcd"); + kv = new KeyValue(r0, f, q1, val); + buffer = ByteBuffer.wrap(kv.getBuffer()); + bbCell = new ByteBufferKeyValue(buffer, 0, buffer.remaining()); + + //equality check + //row comparison + //row is "row0"(set by variable r0) + //and we are checking for equality to 'o' at position 1 + //'r' is at position 0. + byte[] component = Bytes.toBytes("o"); + comparable = new BinaryComponentComparator(component, 1); + assertEquals(0, PrivateCellUtil.compareRow(bbCell, comparable)); + assertEquals(0, PrivateCellUtil.compareRow(kv, comparable)); + //value comparison + //value is "abcd"(set by variable val). + //and we are checking for equality to 'c' at position 2. + //'a' is at position 0. + component = Bytes.toBytes("c"); + comparable = new BinaryComponentComparator(component, 2); + assertEquals(0,PrivateCellUtil.compareValue(bbCell, comparable)); + assertEquals(0,PrivateCellUtil.compareValue(kv, comparable)); + + //greater than + component = Bytes.toBytes("z"); + //checking for greater than at position 1. + //for both row("row0") and value("abcd") + //'z' > 'r' + comparable = new BinaryComponentComparator(component, 1); + //row comparison + assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) > 0); + assertTrue(PrivateCellUtil.compareRow(kv, comparable) > 0); + //value comparison + //'z' > 'a' + assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) > 0); + assertTrue(PrivateCellUtil.compareValue(kv, comparable) > 0); + + //less than + component = Bytes.toBytes("a"); + //checking for less than at position 1 for row ("row0") + comparable = new BinaryComponentComparator(component, 1); + //row comparison + //'a' < 'r' + assertTrue(PrivateCellUtil.compareRow(bbCell, comparable) < 0); + assertTrue(PrivateCellUtil.compareRow(kv, comparable) < 0); + //value comparison + //checking for less than at position 2 for value("abcd") + //'a' < 'c' + comparable = new BinaryComponentComparator(component, 2); + assertTrue(PrivateCellUtil.compareValue(bbCell, comparable) < 0); + assertTrue(PrivateCellUtil.compareValue(kv, comparable) < 0); } } diff --git a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto index 55253aae5f..6a087d3fa6 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Comparator.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Comparator.proto @@ -77,3 +77,8 @@ message SubstringComparator { message BigDecimalComparator { required ByteArrayComparable comparable = 1; } + +message BinaryComponentComparator { + required bytes value = 1; + required uint32 offset = 2; +} diff --git a/hbase-protocol/src/main/protobuf/Comparator.proto b/hbase-protocol/src/main/protobuf/Comparator.proto index 878a179ef3..802021f7cc 100644 --- a/hbase-protocol/src/main/protobuf/Comparator.proto +++ b/hbase-protocol/src/main/protobuf/Comparator.proto @@ -76,3 +76,8 @@ message SubstringComparator { message BigDecimalComparator { required ByteArrayComparable comparable = 1; } + +message BinaryComponentComparator { + required bytes value = 1; + required uint32 offset = 2; +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java new file mode 100644 index 0000000000..6b7be523eb --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFiltersWithBinaryComponentComparator.java @@ -0,0 +1,254 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.codec.binary.Hex; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.CompareOperator; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Category(MediumTests.class) +public class TestFiltersWithBinaryComponentComparator { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestFiltersWithBinaryComponentComparator.class); + + private final static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); + private static final Logger LOG = + LoggerFactory.getLogger(TestFiltersWithBinaryComponentComparator.class); + private byte[] family = Bytes.toBytes("family"); + private byte[] qf = Bytes.toBytes("qf"); + private TableName tableName; + private int aOffset = 0; + private int bOffset = 4; + private int cOffset = 8; + private int dOffset = 12; + + @Rule + public TestName name = new TestName(); + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + TEST_UTIL.startMiniCluster(); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testRowFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1 + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setRowFilters(filterList); + Scan scan = createScan(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] key = CellUtil.cloneRow(cell); + int a = Bytes.readAsInt(key,aOffset,4); + int b = Bytes.readAsInt(key,bOffset,4); + int c = Bytes.readAsInt(key,cOffset,4); + int d = Bytes.readAsInt(key,dOffset,4); + assertTrue(a == 1 && + b > 10 && + b < 20 && + c > 90 && + c < 100 && + d == 1); + } + ht.close(); + } + + public void testValueFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where value has 'y' at position 1 + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setValueFilters(filterList); + Scan scan = new Scan(); + scan.setFilter(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] value = CellUtil.cloneValue(cell); + assertTrue(Bytes.toString(value).charAt(1) == 'y'); + } + ht.close(); + } + + public void testRowAndValueFilterWithBinaryComponentComparator() throws IOException { + //SELECT * from table where a=1 and b > 10 and b < 20 and c > 90 and c < 100 and d=1 + //and value has 'y' at position 1" + tableName = TableName.valueOf(name.getMethodName()); + Table ht = TEST_UTIL.createTable(tableName, family, Integer.MAX_VALUE); + generateRows(ht, family, qf); + FilterList filterList = new FilterList(FilterList.Operator.MUST_PASS_ALL); + setRowFilters(filterList); + setValueFilters(filterList); + Scan scan = new Scan(); + scan.setFilter(filterList); + List result = getResults(ht,scan); + for(Cell cell: result){ + byte[] key = CellUtil.cloneRow(cell); + int a = Bytes.readAsInt(key,aOffset,4); + int b = Bytes.readAsInt(key,bOffset,4); + int c = Bytes.readAsInt(key,cOffset,4); + int d = Bytes.readAsInt(key,dOffset,4); + assertTrue(a == 1 && + b > 10 && + b < 20 && + c > 90 && + c < 100 && + d == 1); + byte[] value = CellUtil.cloneValue(cell); + assertTrue(Bytes.toString(value).charAt(1) == 'y'); + } + ht.close(); + } + + private void generateRows(Table ht, byte[] family, byte[] qf) + throws IOException { + for (int a = 1; a < 2; ++a) { + for(int b = 9; b < 22; ++b) { + for(int c = 89; c < 102; ++c) { + for(int d = 1; d < 2 ; ++d) { + byte[] key = new byte[16]; + Bytes.putInt(key,0,a); + Bytes.putInt(key,4,b); + Bytes.putInt(key,8,c); + Bytes.putInt(key,12,d); + Put row = new Put(key); + if(c%2==0) { + row.addColumn(family, qf, Bytes.toBytes("abc")); + LOG.info("added row:" + Hex.encodeHex(key) + "with value 'abc'"); + } + else { + row.addColumn(family, qf, Bytes.toBytes("xyz")); + LOG.info("added row:" + Hex.encodeHex(key) + "with value 'xyz'"); + } + } + } + } + } + TEST_UTIL.flush(); + } + + private void setRowFilters(FilterList filterList) { + int bOffset = 4; + byte[] b10 = Bytes.toBytes(10); + Filter b10Filter = new RowFilter(CompareOperator.GREATER, + new BinaryComponentComparator(b10,bOffset)); + filterList.addFilter(b10Filter); + + byte[] b20 = Bytes.toBytes(20); + Filter b20Filter = new RowFilter(CompareOperator.LESS, + new BinaryComponentComparator(b20,bOffset)); + filterList.addFilter(b20Filter); + + int cOffset = 8; + byte[] c90 = Bytes.toBytes(90); + Filter c90Filter = new RowFilter(CompareOperator.GREATER, + new BinaryComponentComparator(c90,cOffset)); + filterList.addFilter(c90Filter); + + byte[] c100 = Bytes.toBytes(100); + Filter c100Filter = new RowFilter(CompareOperator.LESS, + new BinaryComponentComparator(c100,cOffset)); + filterList.addFilter(c100Filter); + + int dOffset = 12; + byte[] d1 = Bytes.toBytes(1); + Filter dFilter = new RowFilter(CompareOperator.EQUAL, + new BinaryComponentComparator(d1,dOffset)); + + filterList.addFilter(dFilter); + + } + + private void setValueFilters(FilterList filterList) { + int offset = 1; + byte[] y = Bytes.toBytes("y"); + Filter yFilter = new ValueFilter(CompareOperator.EQUAL, + new BinaryComponentComparator(y,offset)); + filterList.addFilter(yFilter); + } + + private Scan createScan(FilterList list) { + //build start and end key for scan + byte[] startKey = new byte[16]; //key size with four ints + Bytes.putInt(startKey,aOffset,1); //a=1 + Bytes.putInt(startKey,bOffset,11); //b=11, takes care of b > 10 + Bytes.putInt(startKey,cOffset,91); //c=91, + Bytes.putInt(startKey,dOffset,1); //d=1, + + byte[] endKey = new byte[16]; + Bytes.putInt(endKey,aOffset,1); //a=1 + Bytes.putInt(endKey,bOffset,20); //b=20, takes care of b < 20 + Bytes.putInt(endKey,cOffset,100); //c=100, + Bytes.putInt(endKey,dOffset,1); //d=1, + + //setup scan + Scan scan = new Scan().withStartRow(startKey).withStopRow(endKey); + scan.setFilter(list); + return scan; + } + + private List getResults(Table ht, Scan scan) throws IOException { + ResultScanner scanner = ht.getScanner(scan); + List results = new ArrayList<>(); + Result r; + while ((r = scanner.next()) != null) { + for (Cell kv : r.listCells()) { + results.add(kv); + } + } + scanner.close(); + return results; + } + +} -- 2.22.1