Index: hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowKey.java =================================================================== --- hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowKey.java (revision 0) +++ hbase-common/src/test/java/org/apache/hadoop/hbase/util/TestRowKey.java (revision 0) @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.nio.ByteBuffer; + +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.Assert; +import org.junit.Test; +import org.apache.hadoop.hbase.SmallTests; +import org.junit.experimental.categories.Category; + +@Category(SmallTests.class) +public class TestRowKey { + + private int hashVal = 4; + private int intVal = 5; + private long longVal = 6; + + @Test + public void testCreate() throws Exception { + + int elements[] = {RowKeySchema.SIZEOF_MD5_HASH, RowKeySchema.SIZEOF_INT, RowKeySchema.SIZEOF_LONG}; + RowKeySchema schema = new RowKeySchema(elements); + + RowKey rowkey = schema.createRowKey(); + rowkey.setHash(0, hashVal); + rowkey.setInt(1, intVal); + rowkey.setLong(2, longVal); + + byte bytes[] = rowkey.getBytes(); + Assert.assertEquals("key length", schema.getRowKeyLength(), bytes.length); + + Assert.assertEquals("e1", rowkey.getInt(1), intVal); + Assert.assertEquals("e2", rowkey.getLong(2), longVal); + } + + @Test + public void testNegativeCreate() { + int elements[] = {RowKeySchema.SIZEOF_MD5_HASH, RowKeySchema.SIZEOF_INT, RowKeySchema.SIZEOF_LONG}; + RowKeySchema schema = new RowKeySchema(elements); + + boolean passed = true; + try { + RowKey rowkey = schema.createRowKey(); + rowkey.setHash(1, intVal); // trying to set 'int' on an element that is sized for a hash. + } catch (Exception e) { + // we are expecting a sizing exception because we are setting a hash onto an element + // sized for an int. + passed = false; + } + if (passed) { + Assert.fail("Test did not fail!"); + } + } +} Index: hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKeySchema.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKeySchema.java (revision 0) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKeySchema.java (revision 0) @@ -0,0 +1,106 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * Utility class used to define the schema of a RowKey. + * An instance of RowKeySchema is created by defining an array of integers representing the + * lengths of the key elements (i.e., parts) of the rowkey.
+ * + *
+ *  int lengths[] = {RowKeySchema.SIZEOF_MD5_HASH, RowKeySchema.SIZEOF_INT, RowKeySchema.SIZEOF_LONG};
+ *  RowKeySchema schema = new RowKeySchema(lengths);
+ *   
+ *  RowKey rowkey = schema.createRowKey();
+ * 
+ *
+ * In the example above, the rowkey consists of 3 key elements: an MD5 hash, followed by an int, followed by a long. + *
+ * @see org.apache.hadoop.hbase.util.RowKey + */ +public class RowKeySchema { + + private int[] lengths = null; + + private int rowKeyLength = 0; + private int[] startPositions = null; + + public static final int SIZEOF_MD5_HASH = 16; + public static final int SIZEOF_BYTE = Bytes.SIZEOF_BYTE; + public static final int SIZEOF_INT = Bytes.SIZEOF_INT; + public static final int SIZEOF_LONG = Bytes.SIZEOF_LONG; + + public RowKeySchema(int[] keyElementLengths) { + lengths = keyElementLengths; + startPositions = new int[keyElementLengths.length]; + + for (int i = 0; i < keyElementLengths.length; i++) { + startPositions[i] = rowKeyLength; + rowKeyLength += lengths[i]; + } + } + + /** + * + * @param keyElement zero-based index of a key element in this schema. + * @return length of byte array of the key element specified. + */ + public int getElementLength(int keyElement) { + return lengths[keyElement]; + } + + /** + * + * @return number of key elements (components) in this schema. + */ + public int getNumberOfElements() { + return lengths.length; + } + + /** + * + * @return total length of the rowkey based on this schema + */ + public int getRowKeyLength() { + return rowKeyLength; + } + + /** + * + * @return RowKey instance that conforms to this schema + */ + public RowKey createRowKey() { + return new RowKey(this); + } + + /** + * + * @param keyElement zero-based index of the specified key element in this schema + * @return start position in the backing byte-array for this key element. + */ + public int getRowKeyStartPosition(int keyElement) { + return startPositions[keyElement]; + } + +} Index: hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKey.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKey.java (revision 0) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/util/RowKey.java (revision 0) @@ -0,0 +1,233 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.util; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.hadoop.hbase.util.Bytes; + +/** + * A stateful utility class for creating rowkeys for HBase tables, particularly composite keys. RowKey + * creates fixed length keys without the need for delimeters in between key elements (i.e., parts of the + * rowkey), which is a best practice in HBase. A RowKey instance is instantiated from an associated RowKeySchema, + * which defines the row key elements and their lengths.
+ *
+ *   int elements[] = {RowKeySchema.SIZEOF_MD5_HASH, RowKeySchema.SIZEOF_INT, RowKeySchema.SIZEOF_LONG};
+ *   RowKeySchema schema = new RowKeySchema(elements);
+ *   
+ *   RowKey rowkey = schema.createRowKey();
+ *   rowkey.setHash(0, hashVal);
+ *   rowkey.setInt(1, intVal);
+ *   rowkey.setLong(2, longVal);
+ *   
+ *   byte bytes[] = rowkey.getBytes();
+ * 
+ * The above example specifies that the key is comprised of an MD5 hash followed by an int, followed by a long. + * The RowKey instance is used to set the values of the key elements (setHash, setInt, setLong), and then obtaining + * the constructed byte=array to be used as the rowkey in the table. + *
+ * RowKey instances can be re-used by calling reset() and will conform to the RowKeySchema specified on creation. + *
+ * @see org.apache.hadoop.hbase.util.RowKeySchema + */ +public class RowKey { + + private static final String MD5 = "MD5"; + private byte bytes[] = null; + private MessageDigest digest = null; + + private RowKeySchema schema = null; + + + /** + * + * @param schema RowKeySchema instance that describes the format of the rowkey + */ + public RowKey(RowKeySchema schema) { + this.schema = schema; + bytes = new byte[schema.getRowKeyLength()]; + } + + /** + * This method is used to set the entire backing byte array. Use this, for example, + * when a RowKey instance is used to read/process elements of a rowkey obtained from a table. + * + * The byte array must be consistent with the RowKeySchema that created this RowKey instance. + * + * @param bytes + */ + public void setBytes(byte[] bytes) { + this.bytes = bytes; + } + + /** + * Returns a byte at the specified key element position. + * + * @param elementPosition zero-based index of key element + * @return byte + */ + public byte getByte(int elementPosition) { + return bytes[schema.getRowKeyStartPosition(elementPosition)]; + } + + /** + * Returns an int at the specified key element position. + * + * @param elementPosition zero-based index of key element + * @return int + */ + public int getInt(int elementPosition) { + return Bytes.toInt(getBytes(elementPosition)); + } + + /** + * Returns a long at the specified key element position. + * + * @param elementPosition zero-based index of key element + * @return long + */ + public long getLong(int elementPosition) { + return Bytes.toLong(getBytes(elementPosition)); + } + + /** + * + * Returns the byte array at the specified key element position. + * + * @param elementPosition zero-based index of key element + * @return byte array + */ + public byte[] getBytes(int elementPosition) { + int elementLength = schema.getElementLength(elementPosition); + byte[] b = new byte[elementLength]; + System.arraycopy(bytes, schema.getRowKeyStartPosition(elementPosition), b, 0, elementLength); + return b; + } + + /** + * Addds a byte to the rowkey at the specified key element position. + * + * @param elementPosition zero-based index of key element + * @param b byte + */ + public void setByte(int elementPosition, byte b) { + byte tb[] = new byte[1]; + tb[0] = b; + setBytes(elementPosition, tb); + } + + /** + * Adds an int to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param i (int) + */ + public void setInt(int elementPosition, int i) { + setBytes(elementPosition, Bytes.toBytes(i)); + } + + /** + * Adds a long to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param l (long) + */ + public void setLong(int elementPosition, long l) { + setBytes(elementPosition, Bytes.toBytes(l)); + } + + /** + * Adds a bytearray to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param b (bytes) + */ + public void setBytes(int elementPosition, byte[] b) { + int elementLength = schema.getElementLength(elementPosition); + if (elementLength != b.length) { + throw new RuntimeException("KeyElement wrong size! keyElement:" + elementPosition + " expected " + elementLength + " was " + b.length); + } + System.arraycopy(b, 0, bytes, schema.getRowKeyStartPosition(elementPosition), b.length); + } + + /** + * Adds an int, which will be MD5 hashed, to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param i (int that will be hashed) + * @throws NoSuchAlgorithmException + */ + public void setHash(int elementPosition, int i) throws NoSuchAlgorithmException { + setHash(elementPosition, Bytes.toBytes(i)); + } + + /** + * Adds a long, which will be MD5 hashed, to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param l (long that will be hashed) + * @throws NoSuchAlgorithmException + */ + public void setHash(int elementPosition, long l) throws NoSuchAlgorithmException { + setHash(elementPosition, Bytes.toBytes(l)); + } + + /** + * Adds a String, which will be MD5 hashed, to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param s (String that will be hashed) + * @throws NoSuchAlgorithmException + */ + public void setHash(int elementPosition, String s) throws NoSuchAlgorithmException { + setHash(elementPosition, s.getBytes()); + } + + /** + * Adds an array of bytes, which will be MD5 hashed, to the rowkey at the specified key element position + * + * @param elementPosition zero-based index of key element + * @param b (bytes that will be hashed) + * @throws NoSuchAlgorithmException + */ + public void setHash(int elementPosition, byte[] b) throws NoSuchAlgorithmException { + if (digest == null) { + digest = MessageDigest.getInstance(MD5); + } + byte[] dig = digest.digest(b); + setBytes(elementPosition, dig); + } + + /** + * + * @return byte array backing the RowKey. These are the bytes that can be used as the rowkey in your HBase table. + */ + public byte[] getBytes() { + return bytes; + } + + /** + * Resets and empties the backing byte array to the same length originally specified by the RowKeySchema when the RowKey was constructed + */ + public void reset() { + bytes = new byte[schema.getRowKeyLength()]; + } + +}