From 71a830a3546bb82e186d5614ca73a466db31b276 Mon Sep 17 00:00:00 2001 From: Xiang Li Date: Tue, 8 Nov 2016 17:50:11 +0800 Subject: [PATCH] HBASE-14882: Provide a Put API that adds the provided family, qualifier, value without copying --- .../java/org/apache/hadoop/hbase/client/Put.java | 8 +- .../org/apache/hadoop/hbase/client/TestPut.java | 42 ++++ .../java/org/apache/hadoop/hbase/HConstants.java | 5 + .../hadoop/hbase/IndividualBytesFieldCell.java | 240 +++++++++++++++++++++ .../hadoop/hbase/TestIndividualBytesFieldCell.java | 119 ++++++++++ 5 files changed, 409 insertions(+), 5 deletions(-) create mode 100644 hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java create mode 100644 hbase-common/src/test/java/org/apache/hadoop/hbase/TestIndividualBytesFieldCell.java diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java index dbaf3a7..9a23c52 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Put.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.IndividualBytesFieldCell; import org.apache.hadoop.hbase.Tag; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; @@ -226,12 +227,9 @@ public class Put extends Mutation implements HeapSize, Comparable { * for usage internal HBase to and for advanced client applications. */ public Put addImmutable(byte [] family, byte [] qualifier, long ts, byte [] value) { - if (ts < 0) { - throw new IllegalArgumentException("Timestamp cannot be negative. ts=" + ts); - } List list = getCellList(family); - KeyValue kv = createPutKeyValue(family, qualifier, ts, value); - list.add(kv); + list.add(new IndividualBytesFieldCell(this.row, family, qualifier, ts, KeyValue.Type.Put.getCode(), + 0L /* sequence Id */, value, HConstants.EMPTY_BYTE_ARRAY /* tags */)); familyMap.put(family, list); return this; } diff --git a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java index 452f40f..ad7d424 100644 --- a/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java +++ b/hbase-client/src/test/java/org/apache/hadoop/hbase/client/TestPut.java @@ -22,6 +22,8 @@ package org.apache.hadoop.hbase.client; import org.apache.hadoop.hbase.testclassification.ClientTests; import org.apache.hadoop.hbase.testclassification.SmallTests; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.Cell; + import org.junit.Test; import org.junit.experimental.categories.Category; @@ -61,4 +63,44 @@ public class TestPut { Put putRowIsNotImmutable = new Put(rowKey, 1000L, false); assertTrue(rowKey != putRowIsNotImmutable.getRow()); // A local copy is made } + + // HBASE-14882 + @Test + public void testAddImmutable() { + byte[] row = Bytes.toBytes("immutable-row"); + byte[] family = Bytes.toBytes("immutable-family"); + + byte[] qualifier0 = Bytes.toBytes("immutable-qualifier-0"); + byte[] value0 = Bytes.toBytes("immutable-value-0"); + + byte[] qualifier1 = Bytes.toBytes("immutable-qualifier-1"); + byte[] value1 = Bytes.toBytes("immutable-value-1"); + long ts1 = 5000L; + + Put put = new Put(row, true); // "true" indicates that the input row is immutable + put.addImmutable(family, qualifier0, value0); + put.addImmutable(family, qualifier1, ts1, value1); + + // Verify the cell of family:qualifier0 + Cell cell0 = put.get(family, qualifier0).get(0); + + // Verify no local copy is made for family, qualifier or value + assertTrue(cell0.getFamilyArray() == family); + assertTrue(cell0.getQualifierArray() == qualifier0); + assertTrue(cell0.getValueArray() == value0); + + // Verify timestamp + assertTrue(cell0.getTimestamp() == put.getTimeStamp()); + + // Verify the cell of family:qualifier1 + Cell cell1 = put.get(family, qualifier1).get(0); + + // Verify no local copy is made for family, qualifier or value + assertTrue(cell1.getFamilyArray() == family); + assertTrue(cell1.getQualifierArray() == qualifier1); + assertTrue(cell1.getValueArray() == value1); + + // Verify timestamp + assertTrue(cell1.getTimestamp() == ts1); + } } diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 4f8facc..f2b4a55 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -550,6 +550,11 @@ public final class HConstants { public static final int MAX_ROW_LENGTH = Short.MAX_VALUE; /** + * Max length of column family + */ + public static final int MAX_FAMILY_LENGTH = Byte.MAX_VALUE; + + /** * Timestamp to use when we want to refer to the latest cell. * This is the timestamp sent by clients when no timestamp is specified on * commit. diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java new file mode 100644 index 0000000..a1e2e5a --- /dev/null +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/IndividualBytesFieldCell.java @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import org.apache.hadoop.hbase.classification.InterfaceAudience; + +import org.apache.hadoop.hbase.io.HeapSize; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ClassSize; + +@InterfaceAudience.Private +public class IndividualBytesFieldCell implements Cell, HeapSize, Cloneable, SettableSequenceId, SettableTimestamp { + // The following fields are backed by individual byte arrays + private byte[] row; + private byte[] family; + private byte[] qualifier; + private byte[] value; + + // Other fields + private long timestamp; + private byte type; + private long seqId; + private byte[] tags; // Please note that tags is a byte array, rather than an array of org.apache.hadoop.hbase.Tag + + public IndividualBytesFieldCell(byte[] row, byte[] family, byte[] qualifier, + long timestamp, byte type, long seqId, byte[] value, byte[] tags) { + // Check family + // family can not be null or length = 0, or it causes errors when being put into familyMap(which is a TreeMap). + if (family == null || family.length == 0 || family.length > HConstants.MAX_FAMILY_LENGTH) { + throw new IllegalArgumentException("Family cannot be null. " + + "And its length cannot be 0 or greater than " + HConstants.MAX_FAMILY_LENGTH); + } + + // Check timestamp + if (timestamp < 0) { + throw new IllegalArgumentException("Timestamp cannot be negative. ts=" + timestamp); + } + + // No local copy is made, but reference to the input directly + this.row = row; + this.family = family; + this.qualifier = qualifier; + this.value = value; + + // Set others + this.timestamp = timestamp; + this.type = type; + this.seqId = seqId; + this.tags = tags; + } + + /** + * Implement Cell interface + */ + // 1) Row + @Override + public byte[] getRowArray() { + // row can not be null, which is checked by Mutation#checkRow() in the constructor of Put, + // so it is safe to return row without checking. + return row; + } + + @Override + public int getRowOffset() { + return 0; + } + + @Override + public short getRowLength() { + // row can not be null and row.length is checked by Mutation#checkRow() in the constructor of Put, + // so it is safe to call row.length and make the type conversion. + return (short)(row.length); + } + + // 2) Family + @Override + public byte[] getFamilyArray() { + // family can not be null, which is checked in the constructor, + // so it is safe to return family without checking. + return family; + } + + @Override + public int getFamilyOffset() { + return 0; + } + + @Override + public byte getFamilyLength() { + // family can not be null and family.length is checked in the constructor, + // so it is safe to call family.length and make the type conversion. + return (byte)(family.length); + } + + // 3) Qualifier + @Override + public byte[] getQualifierArray() { + // qualifier can be null + return (qualifier == null) ? HConstants.EMPTY_BYTE_ARRAY : qualifier; + } + + @Override + public int getQualifierOffset() { + return 0; + } + + @Override + public int getQualifierLength() { + // qualifier can be null + return (qualifier == null) ? 0 : qualifier.length; + } + + // 4) Timestamp + @Override + public long getTimestamp() { + return timestamp; + } + + //5) Type + @Override + public byte getTypeByte() { + return type; + } + + //6) Sequence ID + @Override + public long getSequenceId() { + return seqId; + } + + //7) Value + @Override + public byte[] getValueArray() { + // value can be null + return (value == null) ? HConstants.EMPTY_BYTE_ARRAY : value; + } + + @Override + public int getValueOffset() { + return 0; + } + + @Override + public int getValueLength() { + // value can be null + return (value == null) ? 0 : value.length; + } + + // 8) Tags + @Override + public byte[] getTagsArray() { + // tags can be null + return (tags == null) ? HConstants.EMPTY_BYTE_ARRAY : tags; + } + + @Override + public int getTagsOffset() { + return 0; + } + + @Override + public int getTagsLength() { + // tags can be null + return (tags == null) ? 0 : tags.length; + } + + /** + * Implement HeapSize interface + */ + @Override + public long heapSize() { + // The instance of this class + int sumOfInstance = 0; + sumOfInstance += ClassSize.OBJECT; // object header + sumOfInstance += 5 * ClassSize.REFERENCE; // pointers to all byte arrays: row, family, qualifier, value, tags + sumOfInstance += Bytes.SIZEOF_BYTE; // type + sumOfInstance += 2 * Bytes.SIZEOF_LONG; // timestamp and sequence id + + sumOfInstance = ClassSize.align(sumOfInstance); // do alignment(padding) + + // All backing byte arrays + int sumOfBackingByteArrays = 0; + // For each backing byte array, add array header size and array length together, then do alignment(padding). + // As qualifier/value/tags can be null, it is not safe to call qualifier/value/tags.length directly, + // so use getQualifierLength(), getValueLength() and getTagsLength() instead. + sumOfBackingByteArrays += ClassSize.align(ClassSize.ARRAY + getRowLength()); // row + sumOfBackingByteArrays += ClassSize.align(ClassSize.ARRAY + getFamilyLength()); // family + sumOfBackingByteArrays += ClassSize.align(ClassSize.ARRAY + getQualifierLength()); // qualifier + sumOfBackingByteArrays += ClassSize.align(ClassSize.ARRAY + getValueLength()); // value + sumOfBackingByteArrays += ClassSize.align(ClassSize.ARRAY + getTagsLength()); // tags + + // Return the sum of instance and all backing byte arrays + return (sumOfInstance + sumOfBackingByteArrays); + } + + /** + * Implement Cloneable interface + */ + @Override + public Object clone() throws CloneNotSupportedException { + return super.clone(); // only a shadow copy + } + + /** + * Implement SettableSequenceId interface + */ + @Override + public void setSequenceId(long seqId) { + this.seqId = seqId; + } + + /** + * Implement SettableTimestamp interface + */ + @Override + public void setTimestamp(long ts) { + this.timestamp = ts; + } + + @Override + public void setTimestamp(byte[] ts, int tsOffset) { + this.timestamp = Bytes.toLong(ts, tsOffset); + } +} diff --git a/hbase-common/src/test/java/org/apache/hadoop/hbase/TestIndividualBytesFieldCell.java b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestIndividualBytesFieldCell.java new file mode 100644 index 0000000..8ab34f9 --- /dev/null +++ b/hbase-common/src/test/java/org/apache/hadoop/hbase/TestIndividualBytesFieldCell.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase; + +import org.apache.hadoop.hbase.util.Bytes; + +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; + +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertEquals; + +@Category({MiscTests.class, SmallTests.class}) +public class TestIndividualBytesFieldCell { + private static IndividualBytesFieldCell c = null; + + @BeforeClass + public static void testConstructorAndVerify() { + // Immutable inputs + byte[] row = Bytes.toBytes("immutable-row"); + byte[] family = Bytes.toBytes("immutable-family"); + byte[] qualifier = Bytes.toBytes("immutable-qualifier"); + byte[] value = Bytes.toBytes("immutable-value"); + + // Other inputs + long ts = 5000L; + byte type = KeyValue.Type.Put.getCode(); + long seqId = 0L; + byte[] tags = HConstants.EMPTY_BYTE_ARRAY; + + c = new IndividualBytesFieldCell(row, family, qualifier, ts, type, seqId, value, tags); + + // Verify if no local copy is made for row, family, qualifier or value + assertTrue(c.getRowArray() == row); + assertTrue(c.getFamilyArray() == family); + assertTrue(c.getQualifierArray() == qualifier); + assertTrue(c.getValueArray() == value); + + // Verify others + assertEquals(ts, c.getTimestamp()); + assertEquals(type, c.getTypeByte()); + assertEquals(seqId, c.getSequenceId()); + assertEquals(tags, c.getTagsArray()); + } + + // Verify if SettableSequenceId interface is implemented + @Test + public void testIfSettableSequenceIdImplemented() { + assertTrue(c instanceof SettableSequenceId); + } + + // Verify if SettableTimestamp interface is implemented + @Test + public void testIfSettableTimestampImplemented() { + assertTrue(c instanceof SettableTimestamp); + } + + // Verify clone() only makes shadow copies for backing byte arrays + @Test + public void testClone() throws CloneNotSupportedException { + IndividualBytesFieldCell cloned = (IndividualBytesFieldCell)c.clone(); + assertTrue(cloned.getRowArray() == c.getRowArray()); + assertTrue(cloned.getFamilyArray() == c.getFamilyArray()); + assertTrue(cloned.getQualifierArray() == c.getQualifierArray()); + assertTrue(cloned.getValueArray() == c.getValueArray()); + } + + // Verify getXXXArray() and getXXXLength() when qualifier/value/tags is null + @Test + public void testNullQualifierValueTags() { + byte[] row = Bytes.toBytes("row"); + byte[] family = Bytes.toBytes("family"); + long ts = 5000L; + byte type = KeyValue.Type.Put.getCode(); + long seqId = 0L; + + // The following fields in IndividualBytesFieldCell can be null + byte[] qualifier = null; + byte[] value = null; + byte[] tags = null; + + Cell c = new IndividualBytesFieldCell(row, family, qualifier, ts, type, seqId, value, tags); + + // getXXXArray() for qualifier, value and tags is supposed to return empty byte array, rather than null + assertEquals(HConstants.EMPTY_BYTE_ARRAY, c.getQualifierArray()); + assertEquals(HConstants.EMPTY_BYTE_ARRAY, c.getValueArray()); + assertEquals(HConstants.EMPTY_BYTE_ARRAY, c.getTagsArray()); + + // getXXXLength() for qualifier, value and tags is supposed to return 0, rather than throw NullPointerException + assertEquals(0, c.getQualifierLength()); + assertEquals(0, c.getValueLength()); + assertEquals(0, c.getTagsLength()); + + // getXXXOffset() for qualifier, value and tags is supposed to return 0 + assertEquals(0, c.getQualifierOffset()); + assertEquals(0, c.getValueOffset()); + assertEquals(0, c.getTagsOffset()); + } +} -- 2.9.3 (Apple Git-75)