diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java
new file mode 100644
index 0000000..7b2c643
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/AbstractVarIntRowKey.java
@@ -0,0 +1,427 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Writable;
+
+/** Serializes and deserializes various integer types a sortable,
+ * variable-length byte array.
+ *
+ *
Integers, signed or unsigned, are sorted in their natural order.
+ * The serialization format is designed to succinctly represent small absolute
+ * values (i.e. -2 or 4), as these values are the most freqently encountered.
+ * Our design is similar in goals to Zig-Zag variable-length integer encoding,
+ * but we also ensure that the serialized bytes sort in natural integer sort
+ * order.
+ *
+ *
Serialization Format
+ * Variable-length integers omit all leading bits that are equal to the
+ * sign bit. This means we have a compact, single-byte representation for
+ * values like -1 and +1, but require more bytes to serialize values such as
+ * +2^30 and -2^30.
+ *
+ *
This abstract class performs serializations to/from a 64-bit long. The
+ * encoding uses a header byte followed by 0-8 data bytes. Each data byte is
+ * a byte from the serialized integer (in big endian order). The header byte
+ * format consists of implicit or explicit sign bit, a type field (or fields)
+ * indicating the length of the serialized integer in bytes, and the most
+ * significant bits of the serialized integer data.
+ *
+ *
Operations for setting/getting the header sign bit and type fields, as
+ * well as manipulating Writable objects, are deferred to subclasses. This
+ * design allows subclasses to choose different integer widths as well as
+ * different signedness properties. For example, unsigned integers may be
+ * implemented by treating all integers as having an implicit sign bit set to
+ * zero. Each subclass has a JavaDoc with the full description of the header
+ * format used by that particular subclass.
+ *
+ *
Reserved Bits
+ * Clients may reserve the most significant bits in the header byte for their
+ * own use. If there are R reservedBits, then the most significant R bits of the
+ * header byte are reserved exclusively for the client and will be initialized
+ * to the client-specified reserved value (default 0) during serialization. The
+ * remaining 8-R bits store the header information. Subclasses specify the
+ * maximum number of reserved bits allowed, and typical maximums are 2-3 bits.
+ *
+ *
Reserved bits are often used to efficiently embed variable-length integers
+ * within more complex serialized data structures while preserving sort
+ * ordering. For example, the {@link BigDecimalRowKey} class uses two reserved
+ * bits to efficiently embed a variable-length integer exponent within a
+ * serialized BigDecimal object.
+ *
+ *
NULL
+ * The header byte value 0x00 is reserved for NULL. Subclasses ensure that this
+ * value is used for the header byte if and only if the serialized value is
+ * NULL.
+ *
+ *
Implicit Termination
+ * If {@link #termination} is false and the sort order is ascending, we can
+ * encode NULL values as a zero-length byte array. Otherwise, the header byte
+ * value 0x00 is used to serialize NULLs. Subclasses ensure this
+ * header byte is used if and only if the serialized value is NULL.
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) each byte.
+ * However, any reserved bits in the header byte will not be inverted.
+ *
+ *
Usage
+ * This is the fastest class for storing integers of any width. It performs no
+ * copies during serialization and deserialization,
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public abstract class AbstractVarIntRowKey extends RowKey
+{
+ protected static final byte NULL = (byte) 0x00;
+
+ /* An extended length integer has a length >= 3 bytes. Thus we store
+ * the encoded length field with a bias of 3 so that we can pack the field
+ * into the minimum number of bits.
+ */
+ protected static final int HEADER_EXT_LENGTH_BIAS = 0x3;
+
+ /* Header type fields - set by subclass */
+ private final byte HEADER_SINGLE;
+ private final byte HEADER_DOUBLE;
+
+ /* Number of data bits in header for each header type - set by subclass */
+ private final int HEADER_SINGLE_DATA_BITS;
+ private final int HEADER_DOUBLE_DATA_BITS;
+ private final int HEADER_EXT_DATA_BITS;
+
+ /* Number of bits in the extended header length field - set by subclass */
+ private final int HEADER_EXT_LENGTH_BITS;
+
+ protected Writable lw;
+ protected int reservedBits, reservedValue;
+
+ protected AbstractVarIntRowKey(byte headerSingle, int headerSingleDataBits,
+ byte headerDouble, int headerDoubleDataBits, int headerExtLengthBits,
+ int headerExtDataBits)
+ {
+ this.HEADER_SINGLE = headerSingle;
+ this.HEADER_SINGLE_DATA_BITS = headerSingleDataBits;
+ this.HEADER_DOUBLE = headerDouble;
+ this.HEADER_DOUBLE_DATA_BITS = headerDoubleDataBits;
+ this.HEADER_EXT_LENGTH_BITS = headerExtLengthBits;
+ this.HEADER_EXT_DATA_BITS = headerExtDataBits;
+ }
+
+ /** Creates a writable object for serializing long integers. */
+ abstract Writable createWritable();
+
+ /** Stores long integer x to Writable w. */
+ abstract void setWritable(long x, Writable w);
+
+ /** Loads a long integer from Writable w. */
+ abstract long getWritable(Writable w);
+
+ /** Gets the number of reserved bits in the header byte. */
+ public int getReservedBits() { return reservedBits; }
+
+ /** Gets the maximum number of reserved bits.
+ * This is equal to the minimum number of data bits in the header,
+ * which is always the number of data bits in the extended length
+ * header type. Typical values are 2-3 bits.
+ */
+ public int getMaxReservedBits() { return HEADER_EXT_DATA_BITS; }
+
+ /** Sets the number of reserved bits in the header byte. Must not exceed
+ * the value returned by @{link getMaxReservedBits}.
+ * @param reservedBits number of reserved header bits
+ * @throws IndexOutOfBoundsException if reservedBits > the maximum number
+ * of reserved bits
+ * @return this object
+ */
+ public AbstractVarIntRowKey setReservedBits(int reservedBits)
+ {
+ if (reservedBits > getMaxReservedBits())
+ throw new IndexOutOfBoundsException("Requested " + reservedBits +
+ " reserved bits " + "but only " + getMaxReservedBits() + " permitted");
+ this.reservedBits = reservedBits;
+ return this;
+ }
+
+ /** Sets the reserved value used in the header byte. Values are restricted
+ * to the number of bits specified in {@link #setReservedBits}. Any value
+ * outside of this range will be automatically truncated to the number of
+ * permitted reserved bits. The value itself is stored in the most
+ * significant bits of the header byte during serialization.
+ *
+ * @param reservedValue value to place in the header byte
+ * @return this object
+ */
+ public AbstractVarIntRowKey setReservedValue(int reservedValue) {
+ this.reservedValue = reservedValue & ((1 << reservedBits) - 1);
+ return this;
+ }
+
+ /** Gets the reserved header value. */
+ public int getReservedValue() { return reservedValue; }
+
+ /** Gets the sign bit of a 64-bit integer x.
+ * @return Long integer with sign bit stored in most significant bit,
+ * and all other bits clear
+ */
+ abstract long getSign(long x);
+
+ /** Reads a byte from long x. Any bytes read past the end of the long are
+ * set to the sign bit.
+ * @param byteOffset the offset of the byte to read (starting from the least
+ * significant byte)
+ */
+ protected byte readByte(long x, int byteOffset)
+ {
+ if (byteOffset >= Bytes.SIZEOF_LONG)
+ return (byte) (getSign(x) >> Integer.SIZE - 1);
+ return (byte) (x >> byteOffset * 8);
+ }
+
+ /** Writes byte b to long x. Assumes all bits of x are initialized to the sign
+ * bit. Any written past the end of the long have no effect.
+ * @param b the byte to write
+ * @param x the long value to write the byte to
+ * @param byteOffset the offset of the byte to write to (starting from the
+ * least significant byte)
+ * @return the result of writing byte b to long x
+ */
+ protected long writeByte(byte b, long x, int byteOffset)
+ {
+ if (byteOffset >= Bytes.SIZEOF_LONG)
+ return x;
+
+ /* We only encode bytes where a bit differs from the sign bit, so we OR
+ * in 1 bits from byte b if x has its sign bit clear, and mask out/clear
+ * the 0 bits from b if x has its sign bit set. The long casts are
+ * necessary for 64-bit shift offsets (see Java Language Spec. 15.19).
+ */
+ if (getSign(x) != 0)
+ return x & ~(((long)~b & 0xff) << (byteOffset * 8));
+ else
+ return x | (((long)b & 0xff) << (byteOffset * 8));
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ if (o == null)
+ return terminate() ? 1 : 0;
+
+ /* Compute the number of bits we must store in our variable-length integer
+ * serialization. This is the bit position + 1 of the most significant bit
+ * that differs from the sign bit, or zero if all bits are equal to the sign.
+ * Reference: Hacker's Delight, 5.3 "Relation to the Log Function", bitsize(x)
+ */
+ long x = getWritable((Writable)o),
+ diffBits = x ^ (getSign(x) >> Long.SIZE - 1);
+ int numBits = Long.SIZE - Long.numberOfLeadingZeros(diffBits);
+
+ if (numBits <= HEADER_SINGLE_DATA_BITS - reservedBits)
+ return 1;
+ else if (numBits <= HEADER_DOUBLE_DATA_BITS - reservedBits + 8)
+ return 2;
+
+ /* Otherwise, x will require an extended (3-9) byte encoding. The number of
+ * data bytes can be computed by calculating one plus the number of
+ * bits rounded up to the nearest multiple of 8, after subtracting out the
+ * data bits that can be stored in the header.
+ */
+ return 1 + ((numBits - HEADER_EXT_DATA_BITS + reservedBits + 7) >>> 3);
+ }
+
+ /** Gets the final masked, serialized null value header byte with reserved
+ * bits set.
+ */
+ protected byte getNull() {
+ int nullHeader = mask(NULL) & (0xff >>> reservedBits);
+ return (byte) (nullHeader | (reservedValue << Byte.SIZE - reservedBits));
+ }
+
+ /** Returns true if the header is for a NULL value. Assumes header is in its
+ * final serialized form (masked, reserved bits if any are present).
+ */
+ protected boolean isNull(byte h) {
+ return (mask(h) & (0xff >>> reservedBits)) == NULL;
+ }
+
+ /** Returns a header byte initialized with the specified sign bit. No masking
+ * or reserved bit shifts should be performed - this operation should execute
+ * as if reservedBits = 0 and order is ascending.
+ * @param sign true if the header byte stores an integer with its sign bit set
+ * @return header byte initialized with the specified sign bit
+ */
+ protected abstract byte initHeader(boolean sign);
+
+ /** Returns a header byte after performing any necessary final serialization
+ * operations for non-NULL headers. This is intended to prevent non-NULL
+ * header bytes from using the header byte reserved for NULL values. The
+ * header argument has already been shifted right by reservedBits to
+ * make room for the reservedValue. No masking has been performed for sort
+ * ordering (and no masking should be performed by this method).
+ * @param h header byte
+ * @return header byte after applying all final serialization operations
+ */
+ protected byte serializeNonNullHeader(byte h) { return h; }
+
+ /** Gets the number of data bits in the header byte. */
+ protected int getNumHeaderDataBits(int length) {
+ if (length == 1)
+ return HEADER_SINGLE_DATA_BITS - reservedBits;
+ else if (length == 2)
+ return HEADER_DOUBLE_DATA_BITS - reservedBits;
+ return HEADER_EXT_DATA_BITS - reservedBits;
+ }
+
+ /** Returns the final serialized header byte for a non-NULL variable-length
+ * integer.
+ * @param sign true if the sign bit of the integer is set
+ * @param length length of the serialized integer (in bytes)
+ * @param data most significant byte of integer data to be serialized
+ * @return serialized, masked header byte
+ */
+ protected byte toHeader(boolean sign, int length, byte data) {
+ int b = initHeader(sign),
+ negSign = sign ? 0 : -1;
+
+ if (length == 1) {
+ b |= (~negSign & HEADER_SINGLE);
+ } else if (length == 2) {
+ b |= (negSign & HEADER_SINGLE) | (~negSign & HEADER_DOUBLE);
+ } else {
+ int encodedLength = (length - HEADER_EXT_LENGTH_BIAS) ^ ~negSign;
+ encodedLength &= (1 << HEADER_EXT_LENGTH_BITS) - 1;
+ encodedLength <<= HEADER_EXT_DATA_BITS;
+ b |= (negSign & (HEADER_SINGLE|HEADER_DOUBLE)) | encodedLength;
+ }
+
+ data &= (1 << getNumHeaderDataBits(length)) - 1;
+ b = serializeNonNullHeader((byte) ((b >>> reservedBits) | data));
+ b = mask((byte)b) & (0xff >>> reservedBits);
+ return (byte) (b | (reservedValue << Byte.SIZE - reservedBits));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+
+ if (o == null) {
+ if (terminate()) {
+ b[offset] = getNull();
+ RowKeyUtils.seek(w, 1);
+ }
+ return;
+ }
+
+ long x = getWritable((Writable)o);
+ int length = getSerializedLength((Writable)o);
+
+ b[offset] = toHeader(getSign(x) != 0, length, readByte(x, length - 1));
+ for (int i = 1; i < length; i++)
+ b[offset + i] = mask(readByte(x, length - i - 1));
+ RowKeyUtils.seek(w, length);
+ }
+
+ /** Gets the sign of a header byte. The returned value will have the
+ * sign stored its most significant bit, and all other bits clear. Assumes the
+ * header byte has its mask and reserved bits, if any, removed (equivalent to
+ * a header byte serialized with reservedBits = 0 and order ascending).
+ */
+ protected abstract byte getSign(byte h);
+
+ /** Performs any initial deserialization operations on a non-NULL header byte.
+ * This is intended to undo any work done by @{link serializeNonNullHeader}.
+ * The header byte is assumed to have had its mask if, any removed (equivalent
+ * to a header byte serialized in ascending order). However, the header byte
+ * does still contains its reserved bits.
+ */
+ protected byte deserializeNonNullHeader(byte h) { return h; }
+
+ /** Gets the length in bytes of a variable-length integer from its header.
+ * Assumes the header byte has its mask and reserved bits, if any,
+ * removed (equivalent to a header byte serialized with reservedBits = 0
+ * and order ascending).
+ */
+ protected int getVarIntLength(byte h) {
+ int negSign = ~getSign(h) >> Integer.SIZE - 1;
+
+ if (((h ^ negSign) & HEADER_SINGLE) != 0) {
+ return 1;
+ } else if (((h ^ negSign) & HEADER_DOUBLE) != 0) {
+ return 2;
+ } else {
+ int length = ((h ^ ~negSign) >>> HEADER_EXT_DATA_BITS);
+ length &= (1 << HEADER_EXT_LENGTH_BITS) - 1;
+ return length + HEADER_EXT_LENGTH_BIAS;
+ }
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+ if (w.getLength() <= 0)
+ return;
+
+ if (isNull(b[offset])) {
+ RowKeyUtils.seek(w, 1);
+ } else {
+ byte h = (byte) (deserializeNonNullHeader(mask(b[offset])));
+ RowKeyUtils.seek(w, getVarIntLength((byte) (h << reservedBits)));
+ }
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+ if (w.getLength() <= 0)
+ return null;
+
+ if (isNull(b[offset])) {
+ RowKeyUtils.seek(w, 1);
+ return null;
+ }
+
+ byte h = (byte) (deserializeNonNullHeader(mask(b[offset])) << reservedBits);
+ int length = getVarIntLength(h);
+
+ long x = (long)getSign(h) >> Long.SIZE - 1;
+ byte d = (byte) (x << getNumHeaderDataBits(length));
+ d |= (byte)((h >>> reservedBits) & ((1 << getNumHeaderDataBits(length))-1));
+
+ x = writeByte(d, x, length - 1);
+ for (int i = 1; i < length; i++)
+ x = writeByte(mask(b[offset + i]), x, length - i - 1);
+ RowKeyUtils.seek(w, length);
+
+ if (lw == null)
+ lw = createWritable();
+ setWritable(x, lw);
+ return lw;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java
new file mode 100644
index 0000000..c514e06
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/BigDecimalRowKey.java
@@ -0,0 +1,413 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.math.BigInteger;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+
+/** Serializes and deserializes BigDecimal objects into a sortable
+ * byte array representation.
+ *
+ *
This format ensures that serialized byte values sort in the natural
+ * order of a {@link BigDecimal} (as ordered by {@link BigDecimal#compareTo}.
+ * NULL values compare less than any non-NULL value.
+ *
+ *
Serialization Overview
+ * A BigDecimal object is composed of a power of 10 exponent scale
+ * and an unscaled, arbitrary precision integer significand. The value of the
+ * BigDecimal is unscaled base 2 significand × 10scale. The
+ * significand is an arbitrary precision {@link BigInteger}, while
+ * the scale is a signed 32-bit int.
+ *
+ *
This encoding format converts a canonicalized BigDecimal into an a
+ * power-of-10 adjusted exponent, and an unscaled arbitrary precision base-10
+ * integer significand. As described in {@link BigDecimal#toString}, an
+ * adjusted exponent is equal to the scale + precision - 1, where
+ * precision is the number of digits in the unscaled base 10 significand
+ * (with trailing zeroes removed).
+ *
+ *
To serialize the BigDecimal, we first serialize the adjusted exponent
+ * combined with a few leading header bits using a subclass of
+ * {@link IntWritableRowKey} (header bits are packed into the adjusted exponent
+ * serialization format using {@link IntWritableRowKey#setReservedValue}). Then
+ * we serialize the base 10 significand using a BCD encoding format described
+ * below. The resulting byte array sorts in natural BigDecimal
+ * sort order.
+ *
+ *
Canonicalization
+ * All BigDecimal values first go through canonicalization by stripping any
+ * trailing zeros using the {@link BigDecimal#stripTrailingZeros} method.
+ * This avoids having multiple numerically equivalent byte representations, and
+ * also ensures that no space is wasted storing redundant trailing zeros.
+ *
+ *
Base Normalization
+ * Next we convert the arbitrary precision BigInteger significand
+ * to base 10, so that the scale and significand have a common base and the
+ * adjusted exponent can be calculated. We cannot use two as a common base
+ * because some powers of 10 (such as 10-1) have infinite
+ * binary representations. We perform the base 10 conversion on the significand
+ * using {@link BigInteger#toString}. We remove the leading '-' if the
+ * significand value is negative, and encode the resulting decimal String into
+ * bytes using the Binary Coded Decimal format described below. We ignore the
+ * significand sign bit when computing the BCD serialization because
+ * the significant sign bit is encoded into the header byte, as described in the
+ * Header section.
+ *
+ *
Zero Nibble Terminated Binary Coded Decimals
+ * We convert decimal Strings into Binary Coded Decimal by mapping
+ * the ASCII characters '0' … '9' to integers 1 … 10. Each ASCII
+ * digit is encoded into a 4-bit nibble. There are two nibbles per byte, stored
+ * in big-endian order. A nibble of 0 is used as terminator to indicate the end
+ * of the BCD encoded string. This ensures that shorter strings that are the
+ * prefix of a longer string will always compare less than the longer string, as
+ * the terminator is a smaller value that any decimal digit.
+ *
+ *
The BCD encoding requires an extra byte of space for the terminator
+ * only if there are an even number of characters (and implicit termination is
+ * not allowed). An odd-length string does not use the least significant nibble
+ * of its last byte, and thus can store a zero terminator nibble without
+ * requiring any additional bytes.
+ *
+ *
Exponent
+ * The adjusted exponent is defined as scale + precision - 1, where
+ * precision is equal to the number of the digits in the base 10 unscaled
+ * significand. We translate the adjusted exponent into a variable-length byte
+ * array using a subclass of {@link IntWritableRowKey}, with two reserved bits
+ * used to encode header information.
+ *
+ *
The adjusted exponent is the sum of two 32-bit integers minus one, which
+ * requires 33 bits of storage in the worst case. Given two reserved bits, the
+ * IntWritable row key format can serialize integers with up to 33 data
+ * bits, not including the sign bit. However, this format truncates all values
+ * in memory to fit into a 32-bit integer.
+ *
+ *
To use the more efficient serialization format employed by
+ * IntWritable while avoiding 32-bit truncation, This class
+ * subclasses IntWritableRowKey to use LongWritable
+ * rather than IntWritable objects for storing values in memory.
+ * The byte serialization format remains unchanged (and is slightly more
+ * efficient for 33-bit objects than the format employed by
+ * {@link LongWritableRowKey}).
+ *
+ *
Header
+ * The header encodes the type of the BigDecimal: null, negative, zero, or
+ * positive. These types are assigned to integer values 0-3, respectively.
+ * We use two reserved header bits for the header value, and serialize as part
+ * of the adjusted exponent using
+ * IntWritableRowKey.setReservedValue. If the BigDecimal is NULL
+ * or zero, the associated adjusted exponent is also NULL (as there is no finite
+ * power of 10 that can produce a value of NULL or zero) and there is no
+ * significand. For positive or negative BigDecimals, the adjusted exponent and
+ * significand are always present, and the significand is serialized after the
+ * adjusted exponent.
+ *
+ *
If the header is negative, then all other serialized bits except for the
+ * two-bit header are logically inverted. This is to preserve sort order, as
+ * negative numbers with larger exponents or significands should compare less
+ * than negative numbers with smaller exponents or significands.
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) all
+ * serialized bytes, including the header bits in the adjusted exponent. We
+ * perform this negation on all serialized bytes even if we have already
+ * negated bytes once due to a negative header value.
+ *
+ *
Implicit Termination
+ * If {@link #termination} is false and the sort order is ascending, we
+ * encode NULL values as a zero-length byte array instead of the format
+ * specified above. We also omit the trailing terminator byte in our BCD
+ * representation (which is only required for even-length BCD serializations
+ * anyway). Implicit termination is discussed further in
+ * {@link RowKey}.
+ *
+ *
Usage
+ * This is the second fastest class for storing BigDecimal objects.
+ * Two copies are performed during serialization and three for deserialization.
+ * Unfortunately, as BigDecimal objects are immutable, they cannot
+ * be re-used during deserialization. Each deserialization must allocate a new
+ * BigDecimal. There is currently no available mutable
+ * BigDecimal equivalent.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class BigDecimalRowKey extends RowKey
+{
+ /* Header types */
+ protected static final byte HEADER_NULL = 0x00;
+ protected static final byte HEADER_NEGATIVE = 0x01;
+ protected static final byte HEADER_ZERO = 0x02;
+ protected static final byte HEADER_POSITIVE = 0x03;
+
+ /* Number of header bits */
+ protected static final int HEADER_BITS = 0x2;
+
+ protected LongWritable lw;
+ protected ExponentRowKey expKey;
+ protected byte signMask;
+
+ public BigDecimalRowKey() {
+ expKey = new ExponentRowKey();
+ expKey.setReservedBits(HEADER_BITS).setTermination(Termination.MUST);
+ }
+
+ @Override
+ public RowKey setOrder(Order order) {
+ expKey.setOrder(order);
+ return super.setOrder(order);
+ }
+
+ protected byte getSignMask() { return signMask; }
+
+ protected void resetSignMask() { setSignMask((byte)0); }
+
+ protected void setSignMask(byte signMask) { this.signMask = signMask; }
+
+ @Override
+ protected byte mask(byte b) {
+ return (byte) (b ^ order.mask() ^ signMask);
+ }
+
+ @Override
+ public Class> getSerializedClass() { return BigDecimal.class; }
+
+ /** Gets the length of a String if serialized in our BCD format. We require
+ * 1 byte for every 2 characters, rounding up. Furthermore, if the number
+ * of characters is even, we require an additional byte for the
+ * terminator nibble if terminate() is true.
+ */
+ protected int getSerializedLength(String s) {
+ return (s.length() + (terminate() ? 2 : 1)) >>> 1;
+ }
+
+ /** Serializes a decimal String s into packed, zero nibble-terminated BCD
+ * format. After this operation completes, the position (length) of the byte
+ * buffer is incremented (decremented) by the number of bytes written.
+ * @param s unsigned decimal string to convert to BCD
+ * @param w byte buffer to store the BCD bytes
+ */
+ protected void serializeBCD(String s, ImmutableBytesWritable w) {
+ byte[] b = w.get();
+ int offset = w.getOffset(),
+ strLength = s.length(),
+ bcdLength = getSerializedLength(s);
+
+ for (int i = 0; i < bcdLength; i++) {
+ byte bcd = 0; /* initialize both nibbles to zero terminator */
+ int strPos = 2 * i;
+ if (strPos < strLength)
+ bcd = (byte) (1 + Character.digit(s.charAt(strPos), 10) << 4);
+ if (++strPos < strLength)
+ bcd |= (byte) (1 + Character.digit(s.charAt(strPos), 10));
+ b[offset + i] = mask(bcd);
+ }
+
+ RowKeyUtils.seek(w, bcdLength);
+ }
+
+ /** Converts an arbitrary precision integer to an unsigned decimal string. */
+ protected String getDecimalString(BigInteger i) {
+ String s = i.toString();
+ return i.signum() >= 0 ? s : s.substring(1); /* skip leading '-' */
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ if (o == null)
+ return terminate() ? expKey.getSerializedLength(null) : 0;
+
+ BigDecimal d = ((BigDecimal)o).stripTrailingZeros();
+ BigInteger i = d.unscaledValue();
+ if (i.signum() == 0)
+ return expKey.getSerializedLength(null);
+
+ String s = getDecimalString(i);
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set((long)s.length() + -d.scale() -1L);
+
+ return expKey.getSerializedLength(lw) + getSerializedLength(s);
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ resetSignMask();
+
+ if (o == null) {
+ if (terminate()) {
+ expKey.setReservedValue(mask(HEADER_NULL));
+ expKey.serialize(null, w);
+ }
+ return;
+ }
+
+ BigDecimal d = ((BigDecimal)o).stripTrailingZeros();
+ BigInteger i = d.unscaledValue();
+ if (i.signum() == 0) {
+ expKey.setReservedValue(mask(HEADER_ZERO));
+ expKey.serialize(null, w);
+ return;
+ }
+
+ byte header = i.signum() < 0 ? HEADER_NEGATIVE : HEADER_POSITIVE;
+ expKey.setReservedValue(mask(header));
+
+ String s = getDecimalString(i);
+ /* Adjusted exponent = precision + scale - 1 */
+ long precision = s.length(),
+ exp = precision + -d.scale() -1L;
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set(exp);
+
+ setSignMask((byte) (i.signum() >> Integer.SIZE - 1));
+ expKey.serialize(lw, w);
+ serializeBCD(s, w);
+ }
+
+ /** Decodes a Binary Coded Decimal digit and adds it to a string. Returns
+ * true (and leaves string unmodified) if digit is the terminator byte.
+ * Returns false otherwise.
+ */
+ protected boolean addDigit(byte bcd, StringBuilder sb) {
+ if (bcd == 0)
+ return true;
+ sb.append((char) ('0' + bcd - 1));
+ return false;
+ }
+
+ /** Converts a packed, zero nibble-terminated BCD byte array into an unsigned
+ * decimal String.
+ */
+ protected String deserializeBCD(ImmutableBytesWritable w) {
+ byte[] b = w.get();
+ int offset = w.getOffset(),
+ len = w.getLength(),
+ i = 0;
+
+ StringBuilder sb = new StringBuilder();
+ while(i < len) {
+ byte c = mask(b[offset + i++]);
+ if (addDigit((byte) ((c >>> 4) & 0xf), sb)
+ || addDigit((byte) (c & 0xf), sb))
+ break;
+ }
+
+ RowKeyUtils.seek(w, i);
+ return sb.toString();
+ }
+
+ protected int getBCDEncodedLength(ImmutableBytesWritable w) {
+ byte[] b = w.get();
+ int offset = w.getOffset(),
+ len = w.getLength(),
+ i = 0;
+
+ while (i < len) {
+ byte c = mask(b[offset + i++]);
+ if (((c & 0xf0) == 0) || ((c & 0x0f) == 0))
+ break;
+ }
+
+ return i;
+ }
+
+ /** Deserializes BigDecimal header from exponent byte. This method will set
+ * sign mask to -1 if header is {@link #HEADER_NEGATIVE}, 0 otherwise.
+ * @param b most significant byte of exponent (header byte)
+ * @return the BigDecimal header stored in byte b
+ */
+ protected byte deserializeHeader(byte b) {
+ resetSignMask();
+ byte h = (byte) ((mask(b) & 0xff) >>> Byte.SIZE - HEADER_BITS);
+ setSignMask((byte) (h == HEADER_NEGATIVE ? -1 : 0));
+ return h;
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ if (w.getLength() <= 0)
+ return;
+
+ byte b = w.get()[w.getOffset()];
+ deserializeHeader(b);
+ expKey.skip(w);
+ if (expKey.isNull(b))
+ return;
+ RowKeyUtils.seek(w, getBCDEncodedLength(w));
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+
+ if (w.getLength() <= 0)
+ return null;
+
+ byte h = deserializeHeader(b[offset]);
+ LongWritable o = (LongWritable) expKey.deserialize(w);
+ if (o == null)
+ return h == HEADER_NULL ? null : BigDecimal.ZERO;
+
+ long exp = o.get();
+ String s = deserializeBCD(w);
+
+ int precision = s.length(),
+ scale = (int) (exp - precision + 1L);
+
+ BigInteger i = new BigInteger(h == HEADER_POSITIVE ? s : '-' + s);
+ return new BigDecimal(i, -scale);
+ }
+
+ protected class ExponentRowKey extends IntWritableRowKey {
+ /* The maximum value that can be stored by IntWritableRowKey's serialization
+ * format (excluding the sign bit) is a 35-bit value, which is enough to
+ * store the 33-bit adjusted exponent + two reserved bits. We override the
+ * Writable methods so that a long is used to store the serialization result
+ * in memory, while continuing to use IntWritableRowKey's serialization
+ * format for byte serialization/deserialization.
+ */
+ @Override
+ public Class> getSerializedClass() { return LongWritable.class; }
+
+ @Override
+ Writable createWritable() { return new LongWritable(); }
+
+ @Override
+ void setWritable(long x, Writable w) { ((LongWritable)w).set(x); }
+
+ @Override
+ long getWritable(Writable w) { return ((LongWritable)w).get(); }
+
+ @Override
+ protected byte mask(byte b) {
+ return (byte) (b ^ order.mask() ^ getSignMask());
+ }
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java
new file mode 100644
index 0000000..755b85f
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+
+/** Serializes and deserializes Doubles into a sortable byte array
+ * representation.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link DoubleWritableRowKey} after converting the DoubleWritable to/from a
+ * Double.
+ *
+ *
Usage
+ * This is the slower class for storing doubles. No copies are made when
+ * serializing and deserializing, but unfortunately Double objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive doubles are first passed to
+ * {@link Double#valueOf}, so boxed Double values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class DoubleRowKey extends DoubleWritableRowKey
+{
+ private DoubleWritable dw;
+
+ @Override
+ public Class> getSerializedClass() { return Double.class; }
+
+ protected Object toDoubleWritable(Object o) {
+ if (o == null || o instanceof DoubleWritable)
+ return o;
+ if (dw == null)
+ dw = new DoubleWritable();
+ dw.set((Double)o);
+ return dw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toDoubleWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toDoubleWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ DoubleWritable dw = (DoubleWritable) super.deserialize(w);
+ if (dw == null)
+ return dw;
+
+ return Double.valueOf(dw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java
new file mode 100644
index 0000000..fa9627d
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/DoubleWritableRowKey.java
@@ -0,0 +1,171 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.DoubleWritable;
+
+/** Serializes and deserializes DoubleWritables into a sortable byte array
+ * representation.
+ *
+ *
This format ensures the following total ordering of floating point values:
+ * NULL < Double.NEGATIVE_INFINITY < -Double.MAX_VALUE < ...
+ * < -Double.MIN_VALUE < -0.0 < +0.0; < Double.MIN_VALUE < ...
+ * < Double.MAX_VALUE < Double.POSITIVE_INFINITY < Double.NaN
+ *
+ *
Serialization Format
+ *
Floating point numbers are encoded as specified in IEEE 754. A 64-bit double
+ * precision float consists of a sign bit, 11-bit unsigned exponent encoded
+ * in offset-1023 notation, and a 52-bit significand. The format is described
+ * further in the
+ * Double Precision Floating Point Wikipedia page
+ *
+ *
The value of a normal float is
+ * -1 sign bit × 2exponent - 1023
+ * × 1.significand
Thus, we need only ensure that negative numbers sort in the the exact
+ * opposite order as positive numbers (so that say, negative infinity is less
+ * than negative 1), and that all negative numbers compare less than any
+ * positive number. To accomplish this, we invert the sign bit of all floating
+ * point numbers, and we also invert the exponent and significand bits if the
+ * floating point number was negative.
+ *
+ *
More specifically, we first store the floating point bits into a 64-bit
+ * long l using {@link Double#doubleToLongBits}. This method
+ * collapses all NaNs into a single, canonical NaN value but otherwise leaves
+ * the bits unchanged. We then compute
which inverts the sign bit and XOR's all other bits with the sign bit
+ * itself. Comparing the raw bytes of l in most significant byte
+ * order is equivalent to performing a double precision floating point
+ * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't
+ * compare equal to anything when performing floating point comparisons).
+ *
+ *
Finally, we must encode NULL efficiently. Fortunately, l can
+ * never have all of its bits set to one (equivalent to -1 signed in two's
+ * complement) as this value corresponds to a NaN removed during NaN
+ * canonicalization. Thus, we can encode NULL as a long zero, and all non-NULL
+ * numbers are translated to a long as specified above and then incremented by
+ * 1, which is guaranteed not to cause unsigned overflow as l must
+ * have at least one bit set to zero.
+ *
+ *
The resulting long integer is then converted into a byte array by
+ * serializing the long one byte at a time in most significant byte order.
+ * All serialized values are 8 bytes in length
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) each byte.
+ *
+ *
Implicit Termination
+ * If {@link #termination} is false and the sort order is ascending, we can
+ * encode NULL values as a zero-length byte array instead of using the 8 byte
+ * encoding specified above. Implicit termination is discussed further in
+ * {@link RowKey}.
+ *
+ *
Usage
+ * This is the fastest class for storing doubles. It performs no object copies
+ * during serialization and deserialization.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class DoubleWritableRowKey extends RowKey
+{
+ private static final long NULL = 0;
+ private DoubleWritable dw;
+
+ @Override
+ public Class> getSerializedClass() { return DoubleWritable.class; }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ if (o == null && !terminate())
+ return 0;
+ return Bytes.SIZEOF_LONG;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+ long l;
+
+ if (o == null) {
+ if (!terminate())
+ return;
+ l = NULL;
+ } else {
+ l = Double.doubleToLongBits(((DoubleWritable)o).get());
+ l = (l ^ ((l >> Long.SIZE - 1) | Long.MIN_VALUE)) + 1;
+ }
+
+ Bytes.putLong(b, offset, l ^ order.mask());
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ if (w.getLength() <= 0)
+ return;
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ byte[] s = w.get();
+ int offset = w.getOffset();
+ if (w.getLength() <= 0)
+ return null;
+
+ try {
+ long l = Bytes.toLong(s, offset) ^ order.mask();
+ if (l == NULL)
+ return null;
+
+ if (dw == null)
+ dw = new DoubleWritable();
+
+ l--;
+ l ^= (~l >> Long.SIZE - 1) | Long.MIN_VALUE;
+ dw.set(Double.longBitsToDouble(l));
+ return dw;
+ } finally {
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+ }
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java
new file mode 100644
index 0000000..56171de
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedByteArrayRowKey.java
@@ -0,0 +1,82 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.BytesWritable;
+
+/**
+ * Serialize and deserialize byte arrays into a fixed-length byte array.
+ *
+ * The serialization and deserialization methods are identical to
+ * {@link FixedBytesWritableRowKey} after converting the BytesWritable
+ * to/from a byte[].
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedByteArrayRowKey extends FixedBytesWritableRowKey {
+
+ public FixedByteArrayRowKey(int length) {
+ super(length);
+ }
+
+ @Override
+ public Class> getSerializedClass() {
+ return byte[].class;
+ }
+
+ protected Object toBytesWritable(Object o) {
+ if (o == null || o instanceof BytesWritable)
+ return o;
+ else {
+ final BytesWritable bw = new BytesWritable();
+ final byte[] bytes = (byte[]) o;
+ bw.set(bytes, 0, bytes.length);
+ return bw;
+ }
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toBytesWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toBytesWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ BytesWritable bw = (BytesWritable) super.deserialize(w);
+ if (bw == null) {
+ return null;
+ }
+ else {
+ final byte[] result = new byte[bw.getLength()];
+ System.arraycopy(bw.getBytes(), 0, result, 0, bw.getLength());
+
+ return result;
+ }
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java
new file mode 100644
index 0000000..94e6c23
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedBytesWritableRowKey.java
@@ -0,0 +1,106 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.BytesWritable;
+
+/**
+ * Serializes and deserializes BytesWritable into a fixed length sortable
+ * representation.
+ *
+ * TODO: this doesn't support NULL values (because they can not be distinguished
+ * from empty arrays). Should I explicitly check for this?
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedBytesWritableRowKey extends RowKey {
+ private int length;
+
+ public FixedBytesWritableRowKey(int length) {
+ this.length = length;
+ }
+
+ @Override
+ public Class> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return length;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException {
+ byte[] bytesToWriteIn = w.get();
+ int writeOffset = w.getOffset();
+
+ final BytesWritable bytesWritableToWrite = (BytesWritable) o;
+ final int srcLen = bytesWritableToWrite.getLength();
+ final byte[] bytesToWrite = bytesWritableToWrite.getBytes();
+
+ if (srcLen != length)
+ throw new IllegalArgumentException(
+ "can only serialize byte arrays of length " + length + ", not " + srcLen);
+
+ // apply the sort order mask
+ final byte[] maskedBytesToWrite = maskAll(bytesToWrite, order, 0, srcLen);
+
+ Bytes.putBytes(bytesToWriteIn, writeOffset, maskedBytesToWrite, 0, srcLen);
+ RowKeyUtils.seek(w, srcLen);
+ }
+
+ private byte[] maskAll(byte[] bytes, Order order, int offset, int length) {
+ if (order.mask() == 0) {
+ return bytes; // xor with zeroes has no effect anyways
+ } else {
+ final byte[] masked = new byte[bytes.length];
+ for (int i = offset; i < length + offset; i++) {
+ masked[i] = (byte) (bytes[i] ^ order.mask());
+ }
+ return masked;
+ }
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ RowKeyUtils.seek(w, length);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ int offset = w.getOffset();
+ byte[] serialized = w.get();
+
+ final byte[] unmasked = maskAll(serialized, order, offset, length);
+
+ RowKeyUtils.seek(w, length);
+
+ final BytesWritable result = new BytesWritable();
+ result.set(unmasked, offset, length);
+ return result;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java
new file mode 100644
index 0000000..a049ce9
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntWritableRowKey.java
@@ -0,0 +1,98 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.IntWritable;
+
+/** Serializes and deserializes IntWritablesWritables into a sortable
+ * fixed-length byte array representation.
+ *
+ *
This format ensures that all integers sort in their natural order, as
+ * they would sort when using signed integer comparison.
+ *
+ *
Serialization Format
+ * All Integers are serialized to a 4-byte, fixed-width sortable byte format.
+ * Serialization is performed by inverting the integer sign bit and writing the
+ * resulting bytes to the byte array in big endian order.
+ *
+ *
NULL
+ * Like all fixed-width integer types, this class does NOT support null
+ * value types. If you need null support use {@link IntWritableRowKey}.
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) each byte.
+ *
+ *
Usage
+ * This is the fastest class for storing fixed width 32-bit ints. Use
+ * {@link IntWritableRowKey} for a more compact, variable-length representation
+ * in almost all cases. This format is only more compact if integers most
+ * frequently require 28 or more bits to store (including the sign bit).
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedIntWritableRowKey extends RowKey
+{
+ private IntWritable iw;
+
+ @Override
+ public Class> getSerializedClass() { return IntWritable.class; }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return Bytes.SIZEOF_INT;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+
+ int i = ((IntWritable)o).get();
+ Bytes.putInt(b, offset, i ^ Integer.MIN_VALUE ^ order.mask());
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ int offset = w.getOffset();
+ byte[] s = w.get();
+
+ int i = Bytes.toInt(s, offset) ^ Integer.MIN_VALUE ^ order.mask();
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+
+ if (iw == null)
+ iw = new IntWritable();
+ iw.set(i);
+ return iw;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java
new file mode 100644
index 0000000..998587b
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedIntegerRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/** Serialize and deserialize Integer Objects into a fixed-length sortable
+ * byte array representation.
+ *
+ *
The serialization and deserialization methods are
+ * identical to {@link FixedIntWritableRowKey} after converting the IntWritable
+ * to/from an Integer.
+ *
+ *
Usage
+ * This is the slower class for storing ints. No copies are made when
+ * serializing and deserializing. Unfortunately Integer objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive ints are first passed to
+ * {@link Integer#valueOf}, so boxed Integer values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedIntegerRowKey extends FixedIntWritableRowKey
+{
+ private IntWritable iw;
+
+ @Override
+ public Class> getSerializedClass() { return Integer.class; }
+
+ protected Object toIntWritable(Object o) {
+ if (o == null || o instanceof IntWritable)
+ return o;
+ if (iw == null)
+ iw = new IntWritable();
+ iw.set((Integer)o);
+ return iw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toIntWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toIntWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ IntWritable iw = (IntWritable) super.deserialize(w);
+ if (iw == null)
+ return iw;
+
+ return Integer.valueOf(iw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java
new file mode 100644
index 0000000..8992c3a
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongRowKey.java
@@ -0,0 +1,80 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/** Serialize and deserialize Long Objects into a fixed-length sortable
+ * byte array representation.
+ *
+ *
The serialization and deserialization methods are identical to
+ * {@link FixedLongWritableRowKey} after converting the LongWritable to/from a
+ * Long.
+ *
+ *
Usage
+ * This is the slower class for storing longs. One copy is made when
+ * serializing and deserializing. Unfortunately Long objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive longs are first passed to
+ * {@link Long#valueOf}, so boxed Long values may be shared, reducing the
+ * copies on deserialization, if the valueOf method has frequent
+ * cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedLongRowKey extends FixedLongWritableRowKey
+{
+ private LongWritable lw;
+
+ @Override
+ public Class> getSerializedClass() { return Long.class; }
+
+ protected Object toLongWritable(Object o) {
+ if (o == null || o instanceof LongWritable)
+ return o;
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set((Long)o);
+ return lw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toLongWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toLongWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ LongWritable lw = (LongWritable) super.deserialize(w);
+ if (lw == null)
+ return lw;
+
+ return Long.valueOf(lw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java
new file mode 100644
index 0000000..b0bbee3
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedLongWritableRowKey.java
@@ -0,0 +1,98 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.LongWritable;
+
+/** Serializes and deserializes LongWritables into a sortable
+ * fixed-length byte array representation.
+ *
+ *
This format ensures that all longs sort in their natural order, as
+ * they would sort when using signed longs comparison.
+ *
+ *
Serialization Format
+ * All longs are serialized to an 8-byte, fixed-width sortable byte format.
+ * Serialization is performed by inverting the long sign bit and writing the
+ * resulting bytes to the byte array in big endian order.
+ *
+ *
NULL
+ * Like all fixed-width integer types, this class does NOT support null
+ * value types. If you need null support use {@link LongWritableRowKey}.
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) each byte.
+ *
+ *
Usage
+ * This is the fastest class for storing fixed width 64-bit ints. Use
+ * {@link LongWritableRowKey} for a more compact, variable-length representation
+ * in almost all cases. This format is only more compact if longs most
+ * frequently require 59 or more bits to store (including the sign bit).
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedLongWritableRowKey extends RowKey
+{
+ private LongWritable lw;
+
+ @Override
+ public Class> getSerializedClass() { return LongWritable.class; }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return Bytes.SIZEOF_LONG;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+
+ long l = ((LongWritable)o).get();
+ Bytes.putLong(b, offset, l ^ Long.MIN_VALUE ^ order.mask());
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ int offset = w.getOffset();
+ byte[] s = w.get();
+
+ long l = Bytes.toLong(s, offset) ^ Long.MIN_VALUE ^ order.mask();
+ RowKeyUtils.seek(w, Bytes.SIZEOF_LONG);
+
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set(l);
+ return lw;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java
new file mode 100644
index 0000000..a785863
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntWritableRowKey.java
@@ -0,0 +1,61 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/** Serialize and deserialize unsigned integers into fixed-width, sortable
+ * byte arrays.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link FixedIntWritableRowKey}, except that the sign bit of the integer is
+ * not negated during serialization.
+ *
+ *
Usage
+ * This is the fastest class for storing fixed width 32-bit unsigned ints. Use
+ * {@link UnsignedIntWritableRowKey} for a more compact, variable-length
+ * representation. This format is more compact only if integers most
+ * frequently require 28 bits or more bits to store.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedUnsignedIntWritableRowKey extends FixedIntWritableRowKey
+{
+ protected IntWritable invertSign(IntWritable iw) {
+ iw.set(iw.get() ^ Integer.MIN_VALUE);
+ return iw;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ invertSign((IntWritable)o);
+ super.serialize(o, w);
+ invertSign((IntWritable)o);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ return invertSign((IntWritable) super.deserialize(w));
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java
new file mode 100644
index 0000000..9a21fd1
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedIntegerRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/** Serialize and deserialize Unsigned Integer Objects into a fixed-length
+ * sortable byte array representation.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link FixedUnsignedIntWritableRowKey} after converting the IntWritable
+ * to/from an Integer.
+ *
+ *
Usage
+ * This is the slower class for storing unsigned ints. Only one copy is made
+ * when serializing and deserializing, but unfortunately Integer objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive ints are first passed to
+ * {@link Integer#valueOf}, so boxed Integer values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedUnsignedIntegerRowKey extends FixedUnsignedIntWritableRowKey
+{
+ private IntWritable iw;
+
+ @Override
+ public Class> getSerializedClass() { return Integer.class; }
+
+ protected Object toIntWritable(Object o) {
+ if (o == null || o instanceof IntWritable)
+ return o;
+ if (iw == null)
+ iw = new IntWritable();
+ iw.set((Integer)o);
+ return iw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toIntWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toIntWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ IntWritable iw = (IntWritable) super.deserialize(w);
+ if (iw == null)
+ return iw;
+
+ return Integer.valueOf(iw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java
new file mode 100644
index 0000000..564a366
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/** Serialize and deserialize Unsigned Long Objects into a fixed-length
+ * sortable byte array representation.
+ *
+ *
The serialization and deserialization methods are identical to
+ * {@link FixedUnsignedLongWritableRowKey} after converting the LongWritable
+ * to/from a Long
.
+ *
+ *
Usage
+ * This is the slower class for storing unsigned longs. One copy is made when
+ * serializing and deserializing. Unfortunately Long objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive longs are first passed to
+ * {@link Long#valueOf}, so boxed Long values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedUnsignedLongRowKey extends FixedUnsignedLongWritableRowKey
+{
+ private LongWritable lw;
+
+ @Override
+ public Class> getSerializedClass() { return Long.class; }
+
+ protected Object toLongWritable(Object o) {
+ if (o == null || o instanceof LongWritable)
+ return o;
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set((Long)o);
+ return lw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toLongWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toLongWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ LongWritable lw = (LongWritable) super.deserialize(w);
+ if (lw == null)
+ return lw;
+
+ return Long.valueOf(lw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java
new file mode 100644
index 0000000..a62fb88
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FixedUnsignedLongWritableRowKey.java
@@ -0,0 +1,61 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/** Serialize and deserialize unsigned long integers into fixed-width, sortable
+ * byte arrays.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link FixedLongWritableRowKey}, except the sign bit of the long is not
+ * negated during serialization.
+ *
+ *
Usage
+ * This is the fastest class for storing fixed width 64-bit unsigned ints. Use
+ * {@link UnsignedLongWritableRowKey} for a more compact, variable-length
+ * representation. This format is more compact only if integers most frequently
+ * require 59 or more bits to store.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FixedUnsignedLongWritableRowKey extends FixedLongWritableRowKey
+{
+ protected LongWritable invertSign(LongWritable lw) {
+ lw.set(lw.get() ^ Long.MIN_VALUE);
+ return lw;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ invertSign((LongWritable)o);
+ super.serialize(o, w);
+ invertSign((LongWritable)o);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ return invertSign((LongWritable) super.deserialize(w));
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java
new file mode 100644
index 0000000..bac9e0b
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.FloatWritable;
+
+/** Serializes and deserializes Floats into a sortable byte array
+ * representation.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link FloatWritableRowKey} after converting the FloatWritable to/from a
+ * Float.
+ *
+ *
Usage
+ * This is the slower class for storing floats. No copies are made when
+ * serializing and deserializing, but unfortunately Float objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive floats are first passed to
+ * {@link Float#valueOf}, so boxed Float values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FloatRowKey extends FloatWritableRowKey
+{
+ private FloatWritable fw;
+
+ @Override
+ public Class> getSerializedClass() { return Float.class; }
+
+ protected Object toFloatWritable(Object o) {
+ if (o == null || o instanceof FloatWritable)
+ return o;
+ if (fw == null)
+ fw = new FloatWritable();
+ fw.set((Float)o);
+ return fw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toFloatWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toFloatWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ FloatWritable fw = (FloatWritable) super.deserialize(w);
+ if (fw == null)
+ return fw;
+
+ return Float.valueOf(fw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java
new file mode 100644
index 0000000..cbce499
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/FloatWritableRowKey.java
@@ -0,0 +1,172 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.FloatWritable;
+
+/** Serializes and deserializes FloatWritables into a sortable byte array
+ * representation.
+ *
+ *
This format ensures the following total ordering of floating point values:
+ * NULL < Float.NEGATIVE_INFINITY < -Float.MAX_VALUE < ...
+ * < -Float.MIN_VALUE < -0.0 < +0.0; < Float.MIN_VALUE < ...
+ * < Float.MAX_VALUE < Float.POSITIVE_INFINITY < Float.NaN
+ *
+ *
Serialization Format
+ *
Floating point numbers are encoded as specified in IEEE 754. A 32-bit
+ * single precision float consists of a sign bit, 8-bit unsigned exponent
+ * encoded in offset-127 notation, and a 23-bit significand. The format is
+ * described further in the
+ * Single Precision Floating Point Wikipedia page
+ *
+ *
The value of a normal float is
+ * -1 sign bit × 2exponent - 127
+ * × 1.significand
Thus, we need only ensure that negative numbers sort in the the exact
+ * opposite order as positive numbers (so that say, negative infinity is less
+ * than negative 1), and that all negative numbers compare less than any
+ * positive number. To accomplish this, we invert the sign bit of all floating
+ * point numbers, and we also invert the exponent and significand bits if the
+ * floating point number was negative.
+ *
+ *
More specifically, we first store the floating point bits into a 32-bit
+ * int j using {@link Float#floatToIntBits}. This method
+ * collapses all NaNs into a single, canonical NaN value but otherwise leaves
+ * the bits unchanged. We then compute
which inverts the sign bit and XOR's all other bits with the sign bit
+ * itself. Comparing the raw bytes of j in most significant byte
+ * order is equivalent to performing a single precision floating point
+ * comparison on the underlying bits (ignoring NaN comparisons, as NaNs don't
+ * compare equal to anything when performing floating point comparisons).
+ *
+ *
Finally, we must encode NULL efficiently. Fortunately, j can
+ * never have all of its bits set to one (equivalent to -1 signed in two's
+ * complement) as this value corresponds to a NaN removed during NaN
+ * canonicalization. Thus, we can encode NULL as zero, and all non-NULL
+ * numbers are translated to an int as specified above and then incremented by
+ * 1, which is guaranteed not to cause unsigned overflow as j must
+ * have at least one bit set to zero.
+ *
+ *
The resulting integer is then converted into a byte array by
+ * serializing the integer one byte at a time in most significant byte order.
+ * All serialized values are 4 bytes in length
+ *
+ *
Descending sort
+ * To sort in descending order we perform the same encodings as in ascending
+ * sort, except we logically invert (take the 1's complement of) each byte.
+ *
+ *
Implicit Termination
+ * If {@link #termination} is false and the sort order is ascending, we can
+ * encode NULL values as a zero-length byte array instead of using the 8 byte
+ * encoding specified above. Implicit termination is discussed further in
+ * {@link RowKey}.
+ *
+ *
Usage
+ * This is the fastest class for storing floats. It performs no object copies
+ * during serialization and deserialization.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class FloatWritableRowKey extends RowKey
+{
+ private static final int NULL = 0;
+ private FloatWritable fw;
+
+ @Override
+ public Class> getSerializedClass() { return FloatWritable.class; }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ if (o == null && !terminate())
+ return 0;
+ return Bytes.SIZEOF_INT;
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ byte[] b = w.get();
+ int offset = w.getOffset();
+ int j;
+
+ if (o == null) {
+ if (!terminate())
+ return;
+ j = NULL;
+ } else {
+ j = Float.floatToIntBits(((FloatWritable)o).get());
+ j = (j ^ ((j >> Integer.SIZE - 1) | Integer.MIN_VALUE)) + 1;
+ }
+
+ Bytes.putInt(b, offset, j ^ order.mask());
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+ }
+
+ @Override
+ public void skip(ImmutableBytesWritable w) throws IOException {
+ if (w.getLength() <= 0)
+ return;
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ byte[] s = w.get();
+ int offset = w.getOffset();
+ if (w.getLength() <= 0)
+ return null;
+
+ try {
+ int j = Bytes.toInt(s, offset) ^ order.mask();
+
+ if (j == NULL)
+ return null;
+
+ if (fw == null)
+ fw = new FloatWritable();
+
+ j--;
+ j ^= (~j >> Integer.SIZE - 1) | Integer.MIN_VALUE;
+ fw.set(Float.intBitsToFloat(j));
+ return fw;
+ } finally {
+ RowKeyUtils.seek(w, Bytes.SIZEOF_INT);
+ }
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java
new file mode 100644
index 0000000..74830fc
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntWritableRowKey.java
@@ -0,0 +1,161 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Writable;
+
+/** Serialize and deserialize signed, two's complement integers into a
+ * variable-length sortable byte format.
+ *
+ *
This format ensures that serialized values will sort in their natural
+ * sort order, as determined by (signed) integer comparison. NULL
+ * values compare less than any non-NULL value.
+ *
+ *
Serialization Format
+ * This variable-length format is a subclass of @{link AbstractVarIntRowKey}.
+ * The JavaDoc page for the parent class describes the high-level design of the
+ * general serialization format. The basic idea is to only encode only those
+ * bits that have values differing from the (explicit) sign bit.
+ *
+ *
Our encoding consists of a header byte followed by 0-4 data bytes. The
+ * data bytes are packed 8-bit data values in big-endian order. The header byte
+ * contains the sign bit, the number of data bytes, and the 3-6 most significant
+ * bits of data.
+ *
+ *
The header byte contains both header fields (sign, length) and data. Some
+ * header length fields may be omitted in shorter-length encodings, so smaller
+ * encodings contain more data bits in the header. In the case of single-byte
+ * encodings, the header byte contains 6 bits of data. For double-byte
+ * encodings, the header byte contains contains and 5 bits of data. All other
+ * encoding lengths contain 3 bits of data.
+ *
+ *
Thus we encode all numbers in two's complement using the sign bit in the
+ * header and 2H+D data bits, where H is the number of data bits in
+ * the header byte and D is the number of data bits in the data bytes
+ * (D = number of data bytes × 8).
+ *
+ *
More specifically, the numerical ranges for our variable-length byte
+ * encoding are:
+ *
+ * We support all values that can be represented in a java Integer, so N ≤ 5.
+ *
+ *
+ *
Reserved Bits
+ * Up to three of the most significant bits in the header may be reserved for
+ * use by the application, as three is the minimum number of data bits in the
+ * header byte. Reserved bits decrease the amount of data stored in the header
+ * byte, For example, a single byte encoding with two reserved bits can only encode
+ * integers in the range -16 ≤ x ≤ 15.
+ *
+ *
+ *
+ * The full format of the header byte is
+ *
+ *
+ * Bit 7: negSign
+ * Bit 6: single-byte encoded ^ negSign
+ * Bit 5: double-byte encoded ^ negSign
+ * Bits 3-4: len ^ sign (each bit XOR'd with original, unnegated sign bit)
+ *
+ *
+ *
Bits 6 and 7 are used in all encodings. If bit 6 indicates a single byte
+ * encodng, then bits 0-5 are all data bits. Otherwise, bit 5 is used to
+ * indicate a double byte encoding. If a double byte encoding is used, then
+ * bits 0-4 are data bits. Otherwise, bits 3-4 specify the length of the
+ * extended length (> 2 byte) encoding. In all cases, bits 0-2 are data bits.
+ *
+ *
+ *
The len field represents the (extended) length of the encoded byte array
+ * minus 3, as all extended length serializations must be at least 3 bytes long.
+ * In other words, the encoded len field has a bias of +3, so an encoded
+ * field with value 1 represents a length of 4 bytes when decoded.
+ * The XOR's with sign and negSign are required to preserve sort ordering when
+ * using a big-endian byte array comparator to sort the encoded values.
+ *
+ *
Any padding is done with the sign bit. The worst case space overhead of
+ * this serialization format versus a standard fixed-length encoding is 1 additional
+ * byte. Note that if reserved bits are present, the above header layout is
+ * shifted right by the number of reserved bits.
+ *
+ *
Usage
+ * This is the fastest class for storing signed integers. It performs no
+ * copies during serialization and deserialization,
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class IntWritableRowKey extends AbstractVarIntRowKey
+{
+ /** Header flags */
+ protected static final byte INT_SIGN = (byte) 0x80;
+ protected static final byte INT_SINGLE = (byte) 0x40;
+ protected static final byte INT_DOUBLE = (byte) 0x20;
+
+ /** Header data bits for each header type */
+ protected static final int INT_SINGLE_DATA_BITS = 0x6;
+ protected static final int INT_DOUBLE_DATA_BITS = 0x5;
+ protected static final int INT_EXT_DATA_BITS = 0x3;
+
+ /** Extended (3-9) byte length attributes */
+ /** Number of bits in the length field */
+ protected static final int INT_EXT_LENGTH_BITS = 0x2;
+
+ public IntWritableRowKey() {
+ super(INT_SINGLE, INT_SINGLE_DATA_BITS, INT_DOUBLE,
+ INT_DOUBLE_DATA_BITS, INT_EXT_LENGTH_BITS,
+ INT_EXT_DATA_BITS);
+ }
+
+ @Override
+ public Class> getSerializedClass() { return IntWritable.class; }
+
+ @Override
+ Writable createWritable() { return new IntWritable(); }
+
+ @Override
+ void setWritable(long x, Writable w) { ((IntWritable)w).set((int)x); }
+
+ @Override
+ long getWritable(Writable w) { return ((IntWritable)w).get(); }
+
+ @Override
+ long getSign(long l) { return l & Long.MIN_VALUE; }
+
+ @Override
+ protected byte initHeader(boolean sign) {
+ return sign ? 0 : INT_SIGN; /* sign bit is negated in header */
+ }
+
+ @Override
+ protected byte getSign(byte h) {
+ return (h & INT_SIGN) != 0 ? 0 : Byte.MIN_VALUE;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java
new file mode 100644
index 0000000..5c54837
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/IntegerRowKey.java
@@ -0,0 +1,79 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.IntWritable;
+
+/** Serializes and deserializes Integer objects into a variable-length
+ * sortable byte array representation.
+ *
+ *
The serialization and deserialization method are
+ * identical to {@link IntWritableRowKey} after converting the IntWritable
+ * to/from an Integer
.
+ *
+ *
Usage
+ * This is the slower class for storing ints. No copies are made when
+ * serializing and deserializing. Unfortunately Integer objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive ints are first passed to
+ * {@link Integer#valueOf}, so boxed Integer values may be shared if the
+ * valueOf method has frequent cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class IntegerRowKey extends IntWritableRowKey
+{
+ private IntWritable iw;
+
+ @Override
+ public Class> getSerializedClass() { return Integer.class; }
+
+ protected Object toIntWritable(Object o) {
+ if (o == null || o instanceof IntWritable)
+ return o;
+ if (iw == null)
+ iw = new IntWritable();
+ iw.set((Integer)o);
+ return iw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toIntWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toIntWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ IntWritable iw = (IntWritable) super.deserialize(w);
+ if (iw == null)
+ return iw;
+
+ return Integer.valueOf(iw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java
new file mode 100644
index 0000000..6496e9a
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LazyBigDecimalRowKey.java
@@ -0,0 +1,81 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Serializes and deserializes {@link BigDecimal} Objects into a sortable byte
+ * array representation.
+ *
+ *
This class allows BigDecimal
+ * objects to be lazily deserialized, so that objects are allocated only
+ * when required. The serialization method is identical
+ * to {@link BigDecimalRowKey}. The deserialization methods returns
+ * an ImmutableBytesWritable object containing the raw serialized
+ * bytes. A separate method, {@link #getBigDecimal} (identical to
+ * {@link BigDecimalRowKey#deserialize}) is used to fully deserialize
+ * this byte array lazily on demand.
+ *
+ *
Usage
+ * This class is potentially faster than BigDecimalRowKey as
+ * deserialization is performed lazily. If some values do not have to be fully
+ * deserialized, then the client will not pay the object allocation and parsing
+ * costs for these values. If all values are fully deserialized, then this class
+ * provides no benefits.
+ *
+ *
Two copies are made when serializing and three when fully deserializing.
+ * If full deserialization is not required, then no copies are performed.
+ * Unfortunately BigDecimal objects are immutable, and cannot be re-used across
+ * multiple calls to the getBigDecimal method.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class LazyBigDecimalRowKey extends BigDecimalRowKey
+{
+ private ImmutableBytesWritable rawBytes;
+
+ @Override
+ public Class> getDeserializedClass() {
+ return ImmutableBytesWritable.class;
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ if (rawBytes == null)
+ rawBytes = new ImmutableBytesWritable();
+
+ rawBytes.set(w.get(), w.getOffset(), w.getLength());
+ super.skip(w);
+ return rawBytes;
+ }
+
+ /** Gets the BigDecimal stored in the current position of the
+ * byte array. After this method is called, the position (length) of the byte
+ * array will be incremented (decremented) by the length of the serialized
+ * BigDecimal.
+ */
+ public BigDecimal getBigDecimal(ImmutableBytesWritable w) throws IOException {
+ return (BigDecimal)super.deserialize(w);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java
new file mode 100644
index 0000000..9365525
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongRowKey.java
@@ -0,0 +1,80 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.LongWritable;
+
+/** Serializes and deserializes Long objects into a variable-length
+ * sortable byte aray representation.
+ *
+ *
The serialization and deserialization method are identical to
+ * {@link LongWritableRowKey} after converting the LongWritable to/from a
+ * Long.
+ *
+ *
Usage
+ * This is the slower class for storing longs. No copies are made when
+ * serializing and deserializing. Unfortunately Long objects are
+ * immutable and thus cannot be re-used across multiple deserializations.
+ * However, deserialized primitive longs are first passed to
+ * {@link Long#valueOf}, so boxed Long values may be shared, reducing the
+ * copies on deserialization, if the valueOf method has frequent
+ * cache hits.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class LongRowKey extends LongWritableRowKey
+{
+ private LongWritable lw;
+
+ @Override
+ public Class> getSerializedClass() { return Long.class; }
+
+ protected Object toLongWritable(Object o) {
+ if (o == null || o instanceof LongWritable)
+ return o;
+ if (lw == null)
+ lw = new LongWritable();
+ lw.set((Long)o);
+ return lw;
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toLongWritable(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w) throws IOException {
+ super.serialize(toLongWritable(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ LongWritable lw = (LongWritable) super.deserialize(w);
+ if (lw == null)
+ return lw;
+
+ return Long.valueOf(lw.get());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java
new file mode 100644
index 0000000..b67f0ee
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/LongWritableRowKey.java
@@ -0,0 +1,160 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hadoop.hbase.util.orderly;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Writable;
+
+/** Serialize and deserialize signed, two's complement long integers into a
+ * variable-length sortable byte format.
+ *
+ *
This format ensures that serialized values will sort in their natural
+ * sort order, as determined by (signed) long integer comparison. NULL
+ * values compare less than any non-NULL value.
+ *
+ *
Serialization Format
+ * This variable-length format is a subclass of @{link AbstractVarIntRowKey}.
+ * The JavaDoc page for the parent class describes the high-level design of the
+ * general serialization format. The basic idea is to only encode only those
+ * bits that have values differing from the (explicit) sign bit.
+ *
+ *
Our encoding consists of a header byte followed by 0-8 data bytes. The
+ * data bytes are packed 8-bit data values in big-endian order. The header byte
+ * contains the sign bit, the number of data bytes, and the 2-6 most significant
+ * bits of data.
+ *
+ *
The header byte contains both header fields (sign, length) and data. Some
+ * header length fields may be omitted in shorter-length encodings, so smaller
+ * encodings contain more data bits in the header. In the case of single-byte
+ * encodings, the header byte contains 6 bits of data. For double-byte
+ * encodings, the header byte contains contains and 5 bits of data. All other
+ * encoding lengths contain 2 bits of data.
+ *
+ *
Thus we encode all numbers in two's complement using the sign bit in the
+ * header and 2H+D data bits, where H is the number of data bits in
+ * the header byte and D is the number of data bits in the data bytes
+ * (D = number of data bytes × 8).
+ *
+ *
More specifically, the numerical ranges for our variable-length byte
+ * encoding are:
+ *
+ * We support all values that can be represented in a java Long, so N ≤ 9.
+ *
+ *
+ *
Reserved Bits
+ * Up to two of the most significant bits in the header may be reserved for use
+ * by the application, as two is the minimum number of data bits in the header
+ * byte. Reserved bits decrease the amount of data stored in the header byte,
+ * For example, a single byte encoding with two reserved bits can only encode
+ * integers in the range -16 ≤ x ≤ 15.
+ *
+ *
+ *
+ * The full format of the header byte is
+ *
+ *
+ * Bit 7: negSign
+ * Bit 6: single-byte encoded ^ negSign
+ * Bit 5: double-byte encoded ^ negSign
+ * Bits 2-4: len ^ sign (each bit XOR'd with original, unnegated sign bit)
+ *
+ *
+ *
Bits 6 and 7 are used in all encodings. If bit 6 indicates a single byte
+ * encodng, then bits 0-5 are all data bits. Otherwise, bit 5 is used to
+ * indicate a double byte encoding. If a double byte encoding is used, then
+ * bits 0-4 are data bits. Otherwise, bits 2-4 specify the length of the
+ * extended length (> 2 byte) encoding. In all cases, bits 0-1 are data bits.
+ *
+ *
+ *
The len field represents the (extended) length of the encoded byte array
+ * minus 3, as all extended length serializations must be at least 3 bytes long.
+ * In other words, the encoded len field has a bias of +3, so an encoded
+ * field with value 1 represents a length of 4 bytes when decoded.
+ * The XOR's with sign and negSign are required to preserve sort ordering when
+ * using a big-endian byte array comparator to sort the encoded values.
+ *
+ *
Any padding is done with the sign bit. The worst case space overhead of
+ * this serialization format versus a standard fixed-length encoding is 1 additional
+ * byte. Note that if reserved bits are present, the above header layout is
+ * shifted right by the number of reserved bits.
+ *
+ *
Usage
+ * This is the fastest class for storing signed long integers. It performs no
+ * copies during serialization and deserialization,
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class LongWritableRowKey extends AbstractVarIntRowKey
+{
+ /** Header flags */
+ protected static final byte LONG_SIGN = (byte) 0x80;
+ protected static final byte LONG_SINGLE = (byte) 0x40;
+ protected static final byte LONG_DOUBLE = (byte) 0x20;
+
+ /** Header data bits for each header type */
+ protected static final int LONG_SINGLE_DATA_BITS = 0x6;
+ protected static final int LONG_DOUBLE_DATA_BITS = 0x5;
+ protected static final int LONG_EXT_DATA_BITS = 0x2;
+
+ /** Extended (3-9) byte length attributes */
+ /** Number of bits in the length field */
+ protected static final int LONG_EXT_LENGTH_BITS = 0x3;
+
+ public LongWritableRowKey() {
+ super(LONG_SINGLE, LONG_SINGLE_DATA_BITS, LONG_DOUBLE,
+ LONG_DOUBLE_DATA_BITS, LONG_EXT_LENGTH_BITS,
+ LONG_EXT_DATA_BITS);
+ }
+
+ @Override
+ public Class> getSerializedClass() { return LongWritable.class; }
+
+ @Override
+ Writable createWritable() { return new LongWritable(); }
+
+ @Override
+ void setWritable(long x, Writable w) { ((LongWritable)w).set(x); }
+
+ @Override
+ long getWritable(Writable w) { return ((LongWritable)w).get(); }
+
+ @Override
+ long getSign(long l) { return l & Long.MIN_VALUE; }
+
+ @Override
+ protected byte initHeader(boolean sign) {
+ return sign ? 0 : LONG_SIGN; /* sign bit is negated in header */
+ }
+
+ @Override
+ protected byte getSign(byte h) {
+ return (h & LONG_SIGN) != 0 ? 0 : Byte.MIN_VALUE;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java
new file mode 100644
index 0000000..4e849b9
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/Order.java
@@ -0,0 +1,43 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/** The sort order of a row key, ascending or descending. */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public enum Order
+{
+ ASCENDING((byte)0),
+ DESCENDING((byte)0xff);
+
+ private final byte mask;
+
+ Order(byte mask) { this.mask = mask; }
+
+ /** Gets the byte mask associated with the sort order. When a
+ * serialized byte is XOR'd with the mask, the result is the same byte
+ * but sorted in the direction specified by the Order object.
+ * @see RowKey#serialize
+ */
+ byte mask() { return mask; }
+
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java
new file mode 100644
index 0000000..f463c88
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKey.java
@@ -0,0 +1,207 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Base class for translating objects to/from sort-order preserving byte
+ * arrays.
+ *
+ *
In contrast to other common object serialization methods,
+ * RowKey serializations use a byte array representation that
+ * preserves the object's natural sort ordering. Sorting the raw byte arrays
+ * yields the same sort order as sorting the actual objects themselves, without
+ * requiring the object to be instantiated. Using the serialized byte arrays
+ * as row keys in key-value stores such as HBase will sort rows in the natural
+ * sort order of the object.
+ *
+ *
Key types
+ * Primitive (single-value) key types are: variable-length signed/unsigned
+ * integers and longs, fixed-width signed/unsigned integers and longs,
+ * float/double, bigdecimal, and utf-8/text/String character strings.
+ *
+ *
Composite (multi-value) row key support is provided using struct row keys.
+ * You may have an arbitrary number of fields of any type, and each field
+ * may have its own sort order.
+ *
+ *
Order
+ * All keys may be sorted in ascending or descending order.
+ *
+ *
NULL
+ * Most keys support NULL values (only fixed-width integer/long types do not).
+ * All keys with NULL support treat the NULL value as comparing less than any
+ * non-NULL value for sort ordering purposes.
+ *
+ *
Termination
+ * Some row keys, such as character strings, require an explicit termination
+ * byte during serialization to indicate the end of the serialized value.
+ * This terminator byte can be omitted in some situations, such as during an
+ * ascending sort where the only serialized bytes come from the string row key.
+ * Omitting the explicit terminator byte is known as implicit termination,
+ * because the end of the serialized byte array implicitly terminates the
+ * serialized value. The {@link #setTermination} method can be used to
+ * control when termination is required.
+ *
+ *
If a row key is not forced to terminate, then during deserialization it
+ * will read bytes up until the end of the serialized byte array. This is safe
+ * if the row key serialized all of the bytes up to the end of the byte array
+ * (which is the common case). However, if the user has created a custom
+ * serialized format where their own extra bytes are appended to the byte array,
+ * then this would produce incorrect results and explicit termination should
+ * be forced.
+ *
+ *
The JavaDoc of each
+ * row key class describes the effects of implicit and explicit termination
+ * of the class's serialization. Note that the termination flag
+ * only affects serialization. For all row key types, deserialization and skip
+ * methods are able to detect values encoded in both implicit and explicit
+ * terminated formats, regardless of what the termination flag
+ * is set to.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public abstract class RowKey
+{
+ protected Order order;
+ protected Termination termination = Termination.AUTO;
+ private ImmutableBytesWritable w;
+
+ public RowKey() { this.order = Order.ASCENDING; }
+
+ /** Sets the sort order of the row key - ascending or descending.
+ */
+ public RowKey setOrder(Order order) { this.order = order; return this; }
+
+ /** Gets the sort order of the row key - ascending or descending */
+ public Order getOrder() { return order; }
+
+ /** Returns whether explicit termination in the serialized row key must be guaranteed
+ * in some fashion (such as a terminator byte or a self-describing length).
+ * Otherwise, the end of the byte array may serve as an implicit
+ * terminator. Defaults to "AUTO".
+ */
+ public Termination getTermination() { return termination; }
+
+ /** Sets the mustTerminate flag for this row key. Without explicit termination,
+ * the end of the byte array can be used to terminate encoded values.
+ */
+ public RowKey setTermination(Termination termination) {
+ this.termination = termination;
+ return this;
+ }
+
+ /** Returns true if termination is required */
+ boolean terminate() {
+ switch (termination) {
+ case SHOULD_NOT:
+ return false;
+ case MUST:
+ return true;
+ case AUTO:
+ return order == Order.DESCENDING;
+ default:
+ throw new IllegalStateException("unknown termination " + termination);
+ }
+ }
+
+ /** Gets the class of the object used for serialization.
+ * @see #serialize
+ */
+ public abstract Class> getSerializedClass();
+
+ /** Gets the class of the object used for deserialization.
+ * @see #deserialize
+ */
+ public Class> getDeserializedClass() { return getSerializedClass(); }
+
+ /** Gets the length of the byte array when serializing an object.
+ * @param o object to serialize
+ * @return the length of the byte array used to serialize o
+ */
+ public abstract int getSerializedLength(Object o) throws IOException;
+
+ /** Serializes an object o to a byte array. When this
+ * method returns, the byte array's position will be adjusted by the number
+ * of bytes written. The offset (length) of the byte array is incremented
+ * (decremented) by the number of bytes used to serialize o.
+ * @param o object to serialize
+ * @param w byte array used to store the serialized object
+ */
+ public abstract void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException;
+
+ public void serialize(Object o, byte[] b) throws IOException {
+ serialize(o, b, 0);
+ }
+
+ public void serialize(Object o, byte[] b, int offset) throws IOException {
+ if (w == null)
+ w = new ImmutableBytesWritable();
+ w.set(b, offset, b.length - offset);
+ serialize(o, w);
+ }
+
+ public byte[] serialize(Object o) throws IOException {
+ byte[] b = new byte[getSerializedLength(o)];
+ serialize(o, b, 0);
+ return b;
+ }
+
+ /** Skips over a serialized key in the byte array. When this
+ * method returns, the byte array's position will be adjusted by the number of
+ * bytes in the serialized key. The offset (length) of the byte array is
+ * incremented (decremented) by the number of bytes in the serialized key.
+ * @param w the byte array containing the serialized key
+ */
+ public abstract void skip(ImmutableBytesWritable w) throws IOException;
+
+ /** Deserializes a key from the byte array. The returned object is an
+ * instance of the class returned by {@link #getSerializedClass}. When this
+ * method returns, the byte array's position will be adjusted by the number of
+ * bytes in the serialized key. The offset (length) of the byte array is
+ * incremented (decremented) by the number of bytes in the serialized key.
+ * @param w the byte array used for key deserialization
+ * @return the deserialized key from the current position in the byte array
+ */
+ public abstract Object deserialize(ImmutableBytesWritable w)
+ throws IOException;
+
+ public Object deserialize(byte[] b) throws IOException {
+ return deserialize(b, 0);
+ }
+
+ public Object deserialize(byte[] b, int offset) throws IOException {
+ if (w == null)
+ w = new ImmutableBytesWritable();
+ w.set(b, offset, b.length - offset);
+ return deserialize(w);
+ }
+
+ /** Orders serialized byte b by XOR'ing it with the sort order mask. This
+ * allows descending sort orders to invert the byte values of the serialized
+ * byte stream.
+ */
+ protected byte mask(byte b) {
+ return (byte) (b ^ order.mask());
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java
new file mode 100644
index 0000000..af0e603
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/RowKeyUtils.java
@@ -0,0 +1,71 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.util.Arrays;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.io.Text;
+
+/** Various utility functions for creating and manipulating row keys. */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class RowKeyUtils
+{
+ /** Shared (immutable) zero-length byte array singleton. */
+ public static final byte[] EMPTY = new byte[0];
+
+ /** Converts a (byte array, offset, length) triple into a byte array,
+ * copying only if necessary. No copy is performed if offset is 0 and
+ * length is array.length.
+ */
+ public static byte[] toBytes(byte[] b, int offset, int length) {
+ if (offset == 0 && length == b.length)
+ return b;
+ else if (offset == 0)
+ return Arrays.copyOf(b, length);
+ return Arrays.copyOfRange(b, offset, offset + length);
+ }
+
+ /** Converts an ImmutableBytesWritable to a byte array, copying only if
+ * necessary.
+ */
+ public static byte[] toBytes(ImmutableBytesWritable w) {
+ return toBytes(w.get(), w.getOffset(), w.getLength());
+ }
+
+ /** Converts a Text object to a byte array, copying only if
+ * necessary.
+ */
+ public static byte[] toBytes(Text t) {
+ return toBytes(t.getBytes(), 0, t.getLength());
+ }
+
+ /** Seeks forward/backward within an ImmutableBytesWritable. After
+ * seek is complete, the position (length) of the byte array is
+ * incremented (decremented) by the seek amount.
+ * @param w immutable byte array used for seek
+ * @param offset number of bytes to seek (relative to current position)
+ */
+ public static void seek(ImmutableBytesWritable w, int offset) {
+ w.set(w.get(), w.getOffset() + offset, w.getLength() - offset);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java
new file mode 100644
index 0000000..127d1b0
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StringRowKey.java
@@ -0,0 +1,71 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/** Serialize and deserialize Java Strings into row keys.
+ * The serialization and deserialization method are identical to
+ * {@link UTF8RowKey} after converting the Java String to/from a UTF-8 byte
+ * array.
+ *
+ *
Usage
+ * This is the slowest class for storing characters and strings. One copy is
+ * made during serialization/deserialization, and furthermore the String
+ * objects themselves cannot be re-used across multiple deserializations.
+ * Weigh the cost of additional object instantiation
+ * and copying against the benefits of being able to use all of the various
+ * handy and tidy String functions in Java.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class StringRowKey extends UTF8RowKey
+{
+ @Override
+ public Class> getSerializedClass() { return String.class; }
+
+ protected Object toUTF8(Object o) {
+ if (o == null || o instanceof byte[])
+ return o;
+ return Bytes.toBytes((String)o);
+ }
+
+ @Override
+ public int getSerializedLength(Object o) throws IOException {
+ return super.getSerializedLength(toUTF8(o));
+ }
+
+ @Override
+ public void serialize(Object o, ImmutableBytesWritable w)
+ throws IOException
+ {
+ super.serialize(toUTF8(o), w);
+ }
+
+ @Override
+ public Object deserialize(ImmutableBytesWritable w) throws IOException {
+ byte[] b = (byte[]) super.deserialize(w);
+ return b == null ? b : Bytes.toString(b);
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java
new file mode 100644
index 0000000..7eb381c
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructBuilder.java
@@ -0,0 +1,86 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+
+/** Builds {@link StructRowKey} objects. */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class StructBuilder
+{
+ protected List fields;
+ protected Order order;
+
+ public StructBuilder() {
+ this.fields = new ArrayList();
+ this.order = Order.ASCENDING;
+ }
+
+ /** Adds a field row key to the struct definition.
+ * @param key field row key to append to the struct definition
+ * @return this object
+ */
+ public StructBuilder add(RowKey key) { fields.add(key); return this; }
+
+ /** Sets a struct field to the specified row key. Fields are numbered
+ * sequentially in the order they are added, starting from 0.
+ * @param i struct field definition index
+ * @param key row key assigned to field definition
+ * @return this object
+ */
+ public StructBuilder set(int i, RowKey key) {
+ fields.set(i, key);
+ return this;
+ }
+
+ /** Gets the field row key at field index i. */
+ public RowKey get(int i) { return fields.get(i); }
+
+ /** Gets all field row keys. */
+ public List getFields() { return fields; }
+
+ /** Sets the sort order of the struct. Default is ascending. */
+ public StructBuilder setOrder(Order order) {
+ this.order = order;
+ return this;
+ }
+
+ /** Gets the sort order of the struct definition. */
+ public Order getOrder() { return order; }
+
+ /** Creates a struct row key. */
+ public StructRowKey toRowKey() {
+ RowKey[] fields = this.fields.toArray(new RowKey[0]);
+ return (StructRowKey) new StructRowKey(fields).setOrder(order);
+ }
+
+ /** Resets the struct builder. Removes all fields, sets sort order to
+ * ascending.
+ */
+ public StructBuilder reset() {
+ fields.clear();
+ order = Order.ASCENDING;
+ return this;
+ }
+}
diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java
new file mode 100644
index 0000000..7c08482
--- /dev/null
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/orderly/StructIterator.java
@@ -0,0 +1,127 @@
+/* Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hbase.util.orderly;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+
+/** Iterates over a serialized {@link StructRowKey}. This iterates over each
+ * serialized row key field from the specified struct definition, and for each
+ * field allows you to deserialize the field or skip over its serialized bytes.
+ * In contrast, the methods provided by StructRowKey,
+ * {@link StructRowKey#deserialize} and {@link StructRowKey#skip},
+ * deserialize or skip the entire struct at once (respectively).
+ *
+ *
A deserialized field has the same type as the field row key's deserialized
+ * class (specified by {@link RowKey#getDeserializedClass}). This iterator may
+ * also be used to deserialize bytes from any StructRowKey for
+ * which the specified row key is a prefix. For example, if the specified
+ * struct row key definition has a long and an integer field, you may parse the
+ * serialized output of a struct whose fields are a long, an integer, and a
+ * string. The iterator would return a long followed by an integer, and the
+ * trailing string would be ignored.
+ */
+@InterfaceAudience.Public
+@InterfaceStability.Evolving
+public class StructIterator implements Iterator